We are going to use generalized additive models (GAMs) to estimate time-varying coefficients for climate variables in order to quantify their effects on malaria incidence by specie thought the time period of the study.
First, lets load some packages to handle the tasks we are going to do.
library(htmltools)
# Utilities
library(tictoc)
# Data loading
library(readr)
library(skimr)
# Data manipulation
library(tibble)
library(lubridate)
library(dplyr)
library(splitTools)
library(scales)
library(DT)
library(stringr)
# Time series
library(forecast)
# Plots
library(ggplot2)
ggplot2::theme_set(theme_bw())
ggplot2::theme_update(panel.grid = element_blank())
library(ggsci)
library(GGally)
library(DHARMa)
# GAM
library(mgcv)
library(gratia)
The cleaned up data can be found on the ~/data/processed/ directory on this repository. Define the file path to read the data.
processed_data_path <- file.path("../data/processed/")
file_name <- "pro_malaria-monthly-cases-district-loreto"
file <- paste(file_name, ".csv", sep = "")
file_path <- file.path(processed_data_path, file)
Create a variable containing the column names for the data.
col_names <- c(
"district",
"year",
"month",
"falciparum",
"vivax",
"aet",
"prcp",
"q",
"soilm",
"tmax",
"tmin",
"pop2015",
"province",
"region",
"dttm",
"time"
)
Define column types to optimize the parsing. We exclude soilm and region for the analysis.
# Column types
col_types <-
readr::cols_only(
district = col_character(),
year = col_integer(),
month = col_integer(),
falciparum = col_integer(),
vivax = col_integer(),
aet = col_double(),
prcp = col_double(),
q = col_double(),
tmax = col_double(),
tmin = col_double(),
pop2015 = col_integer(),
province = col_character(),
dttm = col_datetime(),
time = col_integer()
)
Reading csv. file into a data frame.
df_malaria_loreto <-
readr::read_csv(
file = file_path,
col_names = col_names,
col_types = col_types,
skip = 1,
locale = readr::locale(encoding = "UTF-8")
)
We are going to parse district, province, year and month as factors.
to_factor <- c("year", "month", "district", "province")
for (col in to_factor) {
df_malaria_loreto[[col]] <- factor(df_malaria_loreto[[col]])
}
Check data structure.
str(df_malaria_loreto)
## tibble [10,584 x 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ district : Factor w/ 49 levels "ALTO NANAY","ALTO TAPICHE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : Factor w/ 18 levels "2000","2001",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ month : Factor w/ 12 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ falciparum: int [1:10584] 2 2 2 0 3 1 2 11 6 12 ...
## $ vivax : int [1:10584] 42 40 21 3 15 0 4 1 2 4 ...
## $ aet : num [1:10584] 90.8 82.1 66.8 61.8 70 ...
## $ prcp : num [1:10584] 227 278 358 334 380 ...
## $ q : num [1:10584] 136 196 291 272 310 ...
## $ tmax : num [1:10584] 31 31 30.4 30 30 ...
## $ tmin : num [1:10584] 21.7 21.5 21.3 20.8 21.3 ...
## $ pop2015 : int [1:10584] 2784 2784 2784 2784 2784 2784 2784 2784 2784 2784 ...
## $ province : Factor w/ 8 levels "ALTO AMAZONAS",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ dttm : POSIXct[1:10584], format: "2000-01-01" "2000-02-01" ...
## $ time : int [1:10584] 946684800 949363200 951868800 954547200 957139200 959817600 962409600 965088000 967766400 970358400 ...
## - attr(*, "spec")=
## .. cols_only(
## .. district = col_character(),
## .. year = col_integer(),
## .. month = col_integer(),
## .. falciparum = col_integer(),
## .. vivax = col_integer(),
## .. aet = col_double(),
## .. prcp = col_double(),
## .. q = col_double(),
## .. soilm = col_skip(),
## .. tmax = col_double(),
## .. tmin = col_double(),
## .. pop2015 = col_integer(),
## .. province = col_character(),
## .. region = col_skip(),
## .. dttm = col_datetime(format = ""),
## .. time = col_integer()
## .. )
To display a summary of the data, we first define a variable containing a table with summary statistics for each data type in the data frame using the skim function in the skimr package.
skim_malaria <- skimr::skim(df_malaria_loreto)
Then, we display the summary tables for each data type using the yank function of the same package:
skimr::yank(skim_malaria, "factor")
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| district | 0 | 1 | FALSE | 49 | ALT: 216, ALT: 216, BAL: 216, BAR: 216 |
| year | 0 | 1 | FALSE | 18 | 200: 588, 200: 588, 200: 588, 200: 588 |
| month | 0 | 1 | FALSE | 12 | 1: 882, 2: 882, 3: 882, 4: 882 |
| province | 0 | 1 | FALSE | 8 | MAY: 2376, REQ: 2376, ALT: 1296, UCA: 1296 |
skimr::yank(skim_malaria, "numeric")
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| falciparum | 0 | 1 | 1.576000e+01 | 50.88 | 0.00 | 0.000000e+00 | 0.000000e+00 | 1.000000e+01 | 1.240000e+03 | ▇▁▁▁▁ |
| vivax | 0 | 1 | 5.018000e+01 | 121.77 | 0.00 | 0.000000e+00 | 8.000000e+00 | 4.400000e+01 | 1.936000e+03 | ▇▁▁▁▁ |
| aet | 0 | 1 | 8.314000e+01 | 11.78 | 42.22 | 7.434000e+01 | 8.204000e+01 | 9.163000e+01 | 1.183400e+02 | ▁▃▇▅▁ |
| prcp | 0 | 1 | 2.229800e+02 | 112.00 | 14.13 | 1.445600e+02 | 2.041500e+02 | 2.807300e+02 | 8.110900e+02 | ▆▇▂▁▁ |
| q | 0 | 1 | 1.398400e+02 | 113.94 | 0.71 | 5.249000e+01 | 1.195000e+02 | 1.990600e+02 | 7.390900e+02 | ▇▅▁▁▁ |
| tmax | 0 | 1 | 3.143000e+01 | 1.12 | 26.90 | 3.070000e+01 | 3.144000e+01 | 3.217000e+01 | 3.514000e+01 | ▁▂▇▅▁ |
| tmin | 0 | 1 | 2.141000e+01 | 0.83 | 17.78 | 2.092000e+01 | 2.151000e+01 | 2.195000e+01 | 2.501000e+01 | ▁▂▇▂▁ |
| pop2015 | 0 | 1 | 2.121167e+04 | 32481.69 | 690.00 | 6.017000e+03 | 1.074500e+04 | 1.706100e+04 | 1.546960e+05 | ▇▁▁▁▁ |
| time | 0 | 1 | 1.229362e+09 | 163983470.51 | 946684800.00 | 1.087992e+09 | 1.229429e+09 | 1.370693e+09 | 1.512086e+09 | ▇▇▇▇▇ |
skimr::yank(skim_malaria, "POSIXct")
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| dttm | 0 | 1 | 2000-01-01 | 2017-12-01 | 2008-12-16 12:00:00 | 216 |
Create a copy of the data frame where we are going to transform and create some new columns.
dat <- tibble::tibble(df_malaria_loreto)
First, we are going to scale all climate variables from 0 to 1. The aim of this step is to avoid numerical problems in the GAM fitting procedures, and also to have a common scale on the coefficients.
to_scale <- c("aet", "prcp", "q", "soilm", "tmax", "tmin")
for (col in to_scale) {
dat[[col]] <- scales::rescale(df_malaria_loreto[[col]], to = c(0, 1))
}
Lagged variables are going to be used to build distributed lag models (DLM). In order to properly create lagged variables for this data set, first we are going to group the data by districts. Next, we arrange the data in each group by date. Finally, la lag function of the package dplyr is applied to create lagged variables by 1, 3, 6, and 12 months.
dat <- dat %>%
dplyr::group_by(district) %>%
dplyr::arrange(dttm) %>%
dplyr::mutate(
aet_lag1 = dplyr::lag(aet, n = 1L),
aet_lag3 = dplyr::lag(aet, n = 3L),
aet_lag6 = dplyr::lag(aet, n = 6L),
aet_lag12 = dplyr::lag(aet, n = 12L),
prcp_lag1 = dplyr::lag(prcp, n = 1L),
prcp_lag3 = dplyr::lag(prcp, n = 3L),
prcp_lag6 = dplyr::lag(prcp, n = 6L),
prcp_lag12 = dplyr::lag(prcp, n = 12L),
q_lag1 = dplyr::lag(q, n = 1L),
q_lag3 = dplyr::lag(q, n = 3L),
q_lag6 = dplyr::lag(q, n = 6L),
q_lag12 = dplyr::lag(q, n = 12L),
soilm_lag1 = dplyr::lag(q, n = 1L),
soilm_lag3 = dplyr::lag(q, n = 3L),
soilm_lag6 = dplyr::lag(q, n = 6L),
soilm_lag12 = dplyr::lag(q, n = 12L),
tmax_lag1 = dplyr::lag(tmax, n = 1L),
tmax_lag3 = dplyr::lag(tmax, n = 3L),
tmax_lag6 = dplyr::lag(tmax, n = 6L),
tmax_lag12 = dplyr::lag(tmax, n = 12L),
tmin_lag1 = dplyr::lag(tmin, n = 1L),
tmin_lag3 = dplyr::lag(tmin, n = 3L),
tmin_lag6 = dplyr::lag(tmin, n = 6L),
tmin_lag12 = dplyr::lag(tmin, n = 12L)
)
dat
## # A tibble: 10,584 x 38
## # Groups: district [49]
## district year month falciparum vivax aet prcp q tmax tmin pop2015
## <fct> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 ALTO NA~ 2000 1 2 42 0.638 0.267 0.184 0.501 0.540 2784
## 2 ALTO TA~ 2000 1 1 0 0.578 0.194 0.111 0.527 0.419 2106
## 3 BALSAPU~ 2000 1 0 9 0.602 0.178 0.0907 0.305 0.285 17436
## 4 BARRANCA 2000 1 9 9 0.643 0.187 0.0965 0.318 0.319 13608
## 5 BELEN 2000 1 3 68 0.581 0.302 0.227 0.516 0.520 75685
## 6 CAHUAPA~ 2000 1 0 2 0.635 0.189 0.0995 0.355 0.389 8331
## 7 CAPELO 2000 1 0 0 0.568 0.240 0.161 0.584 0.513 4454
## 8 CONTAMA~ 2000 1 0 0 0.672 0.178 0.0833 0.597 0.352 27273
## 9 EMILIO ~ 2000 1 0 2 0.596 0.210 0.127 0.594 0.484 7488
## 10 FERNAND~ 2000 1 2 17 0.559 0.292 0.218 0.501 0.504 20225
## # ... with 10,574 more rows, and 27 more variables: province <fct>,
## # dttm <dttm>, time <int>, aet_lag1 <dbl>, aet_lag3 <dbl>, aet_lag6 <dbl>,
## # aet_lag12 <dbl>, prcp_lag1 <dbl>, prcp_lag3 <dbl>, prcp_lag6 <dbl>,
## # prcp_lag12 <dbl>, q_lag1 <dbl>, q_lag3 <dbl>, q_lag6 <dbl>, q_lag12 <dbl>,
## # soilm_lag1 <dbl>, soilm_lag3 <dbl>, soilm_lag6 <dbl>, soilm_lag12 <dbl>,
## # tmax_lag1 <dbl>, tmax_lag3 <dbl>, tmax_lag6 <dbl>, tmax_lag12 <dbl>,
## # tmin_lag1 <dbl>, tmin_lag3 <dbl>, tmin_lag6 <dbl>, tmin_lag12 <dbl>
Since NA values are going to be present for the first 12 months (because of the 12 month lagged variables), we have to do a filter.
dat <- na.omit(dat)
Finally, we create a column containing the values of the districts’ population on the natural logarithmic scale.
dat$ln_pop2015 <- log(dat$pop2015)
Display the final data frame for checking.
dat
## # A tibble: 9,996 x 39
## # Groups: district [49]
## district year month falciparum vivax aet prcp q tmax tmin pop2015
## <fct> <fct> <fct> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 ALTO NA~ 2001 1 6 2 0.214 0.414 0.386 0.387 0.411 2784
## 2 ALTO TA~ 2001 1 0 0 0.295 0.217 0.164 0.495 0.382 2106
## 3 BALSAPU~ 2001 1 39 43 0.359 0.153 0.0896 0.285 0.262 17436
## 4 BARRANCA 2001 1 9 21 0.362 0.180 0.118 0.296 0.295 13608
## 5 BELEN 2001 1 1 37 0.163 0.498 0.481 0.408 0.397 75685
## 6 CAHUAPA~ 2001 1 27 2 0.351 0.167 0.105 0.328 0.359 8331
## 7 CAPELO 2001 1 0 0 0.200 0.301 0.265 0.509 0.428 4454
## 8 CONTAMA~ 2001 1 0 0 0.523 0.183 0.104 0.609 0.366 27273
## 9 EMILIO ~ 2001 1 2 3 0.285 0.228 0.177 0.551 0.436 7488
## 10 FERNAND~ 2001 1 1 9 0.162 0.471 0.452 0.404 0.393 20225
## # ... with 9,986 more rows, and 28 more variables: province <fct>, dttm <dttm>,
## # time <int>, aet_lag1 <dbl>, aet_lag3 <dbl>, aet_lag6 <dbl>,
## # aet_lag12 <dbl>, prcp_lag1 <dbl>, prcp_lag3 <dbl>, prcp_lag6 <dbl>,
## # prcp_lag12 <dbl>, q_lag1 <dbl>, q_lag3 <dbl>, q_lag6 <dbl>, q_lag12 <dbl>,
## # soilm_lag1 <dbl>, soilm_lag3 <dbl>, soilm_lag6 <dbl>, soilm_lag12 <dbl>,
## # tmax_lag1 <dbl>, tmax_lag3 <dbl>, tmax_lag6 <dbl>, tmax_lag12 <dbl>,
## # tmin_lag1 <dbl>, tmin_lag3 <dbl>, tmin_lag6 <dbl>, tmin_lag12 <dbl>,
## # ln_pop2015 <dbl>
# set.seed(20210223)
#
# indices <- splitTools::partition(
# y = dat$district,
# type = "stratified",
# p = c(train = 0.88, test = 0.12)
# )
#
# train <- dat[indices$train, ]
# test <- dat[indices$test, ]
# get_oos_dev <- function(model, prediction, y, weights = NULL) {
#
# if(is.null(weights)) weights <- rep(1, times= length(y))
#
# dev_residuals <- model$family$dev.resids(y, prediction, weights)
# deviance <- sum(dev_residuals)
# deviance
# }
Predictors: aet, prcp, q, tmax, tmin
tictoc::tic("GAM model fitting")
vivax_tvcm_0 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 3.77 sec elapsed
cat("\nConverged?", vivax_tvcm_0$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_0)
##
## Family: Negative Binomial(0.908)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = q, bs = "tp", k = 30) +
## s(time, by = tmax, bs = "tp", k = 100) + s(time, by = tmin,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.1149 0.3371 -18.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.827 48.000 280.201 < 2e-16 ***
## s(time):aet 17.389 20.839 6.624 < 2e-16 ***
## s(time):prcp 2.003 2.004 1.171 0.310036
## s(time):q 5.688 6.817 3.991 0.000315 ***
## s(time):tmax 62.098 74.336 11.439 < 2e-16 ***
## s(time):tmin 19.869 24.080 13.872 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.456 Deviance explained = 70.1%
## fREML = 14811 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_0, type = "deviance", pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -3.367162e-06 -3.355952e-06 -2.271658e-04 -9.735585e-04 -3.874418e-05
## [6] -3.720399e-06
##
## $hess
## [,1] [,2] [,3] [,4] [,5]
## [1,] 2.337996e+01 -3.006481e-02 3.191238e-06 0.0003387835 -3.216310e-01
## [2,] -3.006481e-02 2.351686e+00 8.253129e-07 0.0162119750 4.608871e-01
## [3,] 3.191238e-06 8.253129e-07 2.277589e-04 0.0009991606 3.471305e-05
## [4,] 3.387835e-04 1.621198e-02 9.991606e-04 1.4730925139 3.036194e-02
## [5,] -3.216310e-01 4.608871e-01 3.471305e-05 0.0303619402 1.624212e+01
## [6,] -1.518762e-02 4.888146e-01 2.586573e-06 -0.0086091634 8.189517e-01
## [,6]
## [1,] -1.518762e-02
## [2,] 4.888146e-01
## [3,] 2.586573e-06
## [4,] -8.609163e-03
## [5,] 8.189517e-01
## [6,] 2.299578e+00
##
## Model rank = 360 / 360
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.83 NA NA
## s(time):aet 50.00 17.39 0.87 0.21
## s(time):prcp 30.00 2.00 0.87 0.24
## s(time):q 30.00 5.69 0.87 0.20
## s(time):tmax 100.00 62.10 0.87 0.18
## s(time):tmin 100.00 19.87 0.87 0.14
tibble::tibble(
x = lubridate::as_datetime(dat$time),
y = vivax_tvcm_0$residuals
) %>%
ggplot(aes(x = x, y = y)) +
geom_point() +
scale_x_datetime(date_labels = "%Y", date_breaks = "1 year") +
labs(x = NULL, y = "Deviance residuals")
mgcv::plot.gam(
x = vivax_tvcm_0,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_0)
## para s(district) s(time):aet s(time):prcp s(time):q s(time):tmax
## worst 1 1.00000000 0.9991857 0.9999320 0.9998684 0.9965358
## observed 1 0.39902337 0.9940252 0.9990555 0.9983211 0.9813848
## estimate 1 0.06455737 0.9933047 0.9991923 0.9986086 0.9787407
## s(time):tmin
## worst 0.9945440
## observed 0.9765045
## estimate 0.9821762
mgcv::concurvity(vivax_tvcm_0, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):q
## para 1.0000000 0.02040816 0.3277867 0.2939797 0.2346176
## s(district) 1.0000000 1.00000000 0.3289483 0.3074769 0.2607852
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8019478 0.6666345
## s(time):prcp 0.8070714 0.01738709 0.7471841 1.0000000 0.9638060
## s(time):q 0.6399314 0.01471070 0.5604360 0.9621734 1.0000000
## s(time):tmax 0.9593538 0.02031868 0.9563570 0.8155472 0.7073408
## s(time):tmin 0.9692789 0.02029111 0.9515915 0.8769878 0.7818448
## s(time):tmax s(time):tmin
## para 0.3253505 0.3294669
## s(district) 0.3346232 0.3352775
## s(time):aet 0.9495592 0.9387231
## s(time):prcp 0.7652513 0.8520376
## s(time):q 0.5965255 0.7094315
## s(time):tmax 1.0000000 0.9514297
## s(time):tmin 0.9488907 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_0 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 3.31 sec elapsed
cat("\nConverged?", falciparum_tvcm_0$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_0)
##
## Family: Negative Binomial(0.529)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = q, bs = "tp", k = 30) +
## s(time, by = tmax, bs = "tp", k = 100) + s(time, by = tmin,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4264 0.4105 -18.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.546 49.000 171.324 <2e-16 ***
## s(time):aet 7.192 8.605 10.225 <2e-16 ***
## s(time):prcp 3.239 3.658 2.292 0.0620 .
## s(time):q 5.836 6.952 2.613 0.0114 *
## s(time):tmax 59.062 70.945 6.070 <2e-16 ***
## s(time):tmin 21.449 26.016 7.307 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.403 Deviance explained = 66.5%
## fREML = 13772 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_0, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -3.518868e-08 -1.443560e-08 -2.223056e-08 -5.257530e-08 -8.094957e-07
## [6] -3.101533e-07
##
## $hess
## [,1] [,2] [,3] [,4] [,5]
## [1,] 22.6358899730 -0.01350932 -0.0009755681 -0.001422401 -0.28478750
## [2,] -0.0135093222 1.89531300 0.0157292065 -0.069450446 -0.06723019
## [3,] -0.0009755681 0.01572921 0.1152797305 0.069492242 -0.00136545
## [4,] -0.0014224011 -0.06945045 0.0694922418 0.985540319 -0.02624397
## [5,] -0.2847875031 -0.06723019 -0.0013654498 -0.026243973 11.84634291
## [6,] -0.0623835530 0.04806034 -0.0059423719 -0.005151761 -0.41295191
## [,6]
## [1,] -0.062383553
## [2,] 0.048060343
## [3,] -0.005942372
## [4,] -0.005151761
## [5,] -0.412951912
## [6,] 2.769777054
##
## Model rank = 360 / 360
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.55 NA NA
## s(time):aet 50.00 7.19 0.85 0.94
## s(time):prcp 30.00 3.24 0.85 0.94
## s(time):q 30.00 5.84 0.85 0.94
## s(time):tmax 100.00 59.06 0.85 0.95
## s(time):tmin 100.00 21.45 0.85 0.95
mgcv::plot.gam(
x = falciparum_tvcm_0,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_0)
## para s(district) s(time):aet s(time):prcp s(time):q s(time):tmax
## worst 1 1.00000000 0.9991857 0.9999320 0.9998684 0.9965358
## observed 1 0.39611215 0.9931296 0.9991574 0.9986375 0.9796553
## estimate 1 0.06455737 0.9933039 0.9991922 0.9986085 0.9787434
## s(time):tmin
## worst 0.9945440
## observed 0.9772649
## estimate 0.9821782
mgcv::concurvity(falciparum_tvcm_0, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):q
## para 1.0000000 0.02040816 0.3279674 0.2942235 0.2348166
## s(district) 1.0000000 1.00000000 0.3291297 0.3077350 0.2610108
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8019210 0.6666026
## s(time):prcp 0.8070714 0.01738709 0.7471705 1.0000000 0.9638083
## s(time):q 0.6399314 0.01471070 0.5604213 0.9621755 1.0000000
## s(time):tmax 0.9593538 0.02031868 0.9563535 0.8155208 0.7073092
## s(time):tmin 0.9692789 0.02029111 0.9515887 0.8769723 0.7818254
## s(time):tmax s(time):tmin
## para 0.3254775 0.3295965
## s(district) 0.3347548 0.3354092
## s(time):aet 0.9495558 0.9387202
## s(time):prcp 0.7652405 0.8520308
## s(time):q 0.5965152 0.7094251
## s(time):tmax 1.0000000 0.9514276
## s(time):tmin 0.9488885 1.0000000
Predictors: aet, q, tmax, tmin
tictoc::tic("GAM model fitting")
vivax_tvcm_1 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.79 sec elapsed
cat("\nConverged?", vivax_tvcm_1$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_1)
##
## Family: Negative Binomial(0.908)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp",
## k = 30) + s(time, by = tmax, bs = "tp", k = 100) + s(time,
## by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.0633 0.3339 -18.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.827 49.000 274.521 < 2e-16 ***
## s(time):aet 17.432 20.890 8.455 < 2e-16 ***
## s(time):q 5.683 6.811 5.561 3.63e-06 ***
## s(time):tmax 62.138 74.384 11.504 < 2e-16 ***
## s(time):tmin 19.933 24.158 13.831 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.455 Deviance explained = 70.1%
## fREML = 14814 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_1, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -3.774402e-08 -2.406240e-07 -2.981255e-08 -6.683805e-07 -1.951544e-07
##
## $hess
## [,1] [,2] [,3] [,4] [,5]
## [1,] 23.3806467951 -0.03435635 0.0006304787 -0.32306655 -0.015678796
## [2,] -0.0343563451 2.41288040 0.0145859015 0.43709118 0.482079442
## [3,] 0.0006304787 0.01458590 1.4569708243 0.02741787 -0.007551268
## [4,] -0.3230665489 0.43709118 0.0274178667 16.20079550 0.836400811
## [5,] -0.0156787958 0.48207944 -0.0075512676 0.83640081 2.245322803
##
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.83 NA NA
## s(time):aet 50.00 17.43 0.84 0.005 **
## s(time):q 30.00 5.68 0.84 <2e-16 ***
## s(time):tmax 100.00 62.14 0.84 0.005 **
## s(time):tmin 100.00 19.93 0.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
x = vivax_tvcm_1,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_1)
## para s(district) s(time):aet s(time):q s(time):tmax s(time):tmin
## worst 1 1.00000000 0.9873856 0.9144782 0.9964242 0.9941011
## observed 1 0.39435693 0.9740345 0.8434204 0.9805507 0.9755251
## estimate 1 0.06325033 0.9758498 0.8481380 0.9778193 0.9814017
mgcv::concurvity(vivax_tvcm_1, full = FALSE)$estimate
## para s(district) s(time):aet s(time):q s(time):tmax
## para 1.0000000 0.02040816 0.3265168 0.2335104 0.3241157
## s(district) 1.0000000 1.00000000 0.3276732 0.2595149 0.3333457
## s(time):aet 0.9668224 0.01992532 1.0000000 0.6668459 0.9495827
## s(time):q 0.6399314 0.01471070 0.5604452 1.0000000 0.5965317
## s(time):tmax 0.9593538 0.02031868 0.9563805 0.7075603 1.0000000
## s(time):tmin 0.9692789 0.02029111 0.9516124 0.7819756 0.9489026
## s(time):tmin
## para 0.3282028
## s(district) 0.3339925
## s(time):aet 0.9387457
## s(time):q 0.7094209
## s(time):tmax 0.9514415
## s(time):tmin 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_1 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.89 sec elapsed
cat("\nConverged?", falciparum_tvcm_1$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_1)
##
## Family: Negative Binomial(0.529)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp",
## k = 30) + s(time, by = tmax, bs = "tp", k = 100) + s(time,
## by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4018 0.4065 -18.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.545 49.000 171.291 <2e-16 ***
## s(time):aet 7.191 8.607 11.491 <2e-16 ***
## s(time):q 6.664 8.016 8.034 <2e-16 ***
## s(time):tmax 59.697 71.621 6.130 <2e-16 ***
## s(time):tmin 21.667 26.279 7.102 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.402 Deviance explained = 66.5%
## fREML = 13778 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_1, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -3.953605e-08 -5.510486e-09 -5.903285e-08 -8.373566e-07 -3.139063e-07
##
## $hess
## [,1] [,2] [,3] [,4] [,5]
## [1,] 22.635798175 -0.01344865 -0.002555688 -0.2905029 -0.06355166
## [2,] -0.013448647 1.98098897 -0.038133451 -0.0622132 0.03828143
## [3,] -0.002555688 -0.03813345 1.375141206 -0.0624381 -0.02176061
## [4,] -0.290502947 -0.06221320 -0.062438105 12.0142663 -0.40130340
## [5,] -0.063551662 0.03828143 -0.021760608 -0.4013034 2.84890650
##
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.55 NA NA
## s(time):aet 50.00 7.19 0.84 0.91
## s(time):q 30.00 6.66 0.84 0.93
## s(time):tmax 100.00 59.70 0.84 0.88
## s(time):tmin 100.00 21.67 0.84 0.88
mgcv::plot.gam(
x = falciparum_tvcm_1,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_1)
## para s(district) s(time):aet s(time):q s(time):tmax s(time):tmin
## worst 1 1.00000000 0.9873856 0.9144782 0.9964242 0.9941011
## observed 1 0.39339010 0.9676825 0.8359675 0.9786208 0.9761656
## estimate 1 0.06325033 0.9758953 0.8483709 0.9779531 0.9814996
mgcv::concurvity(falciparum_tvcm_1, full = FALSE)$estimate
## para s(district) s(time):aet s(time):q s(time):tmax
## para 1.0000000 0.02040816 0.3314763 0.2373774 0.3294197
## s(district) 1.0000000 1.00000000 0.3326532 0.2639145 0.3388293
## s(time):aet 0.9668224 0.01992532 1.0000000 0.6663847 0.9494902
## s(time):q 0.6399314 0.01471070 0.5605638 1.0000000 0.5965780
## s(time):tmax 0.9593538 0.02031868 0.9563137 0.7071295 1.0000000
## s(time):tmin 0.9692789 0.02029111 0.9515541 0.7817192 0.9488734
## s(time):tmin
## para 0.3336369
## s(district) 0.3395147
## s(time):aet 0.9386541
## s(time):q 0.7095184
## s(time):tmax 0.9514141
## s(time):tmin 1.0000000
Predictors: aet, prcp, tmax, tmin
tictoc::tic("GAM model fitting")
vivax_tvcm_2 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.78 sec elapsed
cat("\nConverged?", vivax_tvcm_2$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_2)
##
## Family: Negative Binomial(0.908)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100) +
## s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.0490 0.3344 -18.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.83 49.00 274.419 < 2e-16 ***
## s(time):aet 17.52 20.99 8.394 < 2e-16 ***
## s(time):prcp 5.56 6.66 5.299 9.56e-06 ***
## s(time):tmax 62.16 74.41 11.521 < 2e-16 ***
## s(time):tmin 19.89 24.10 13.747 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.455 Deviance explained = 70.1%
## fREML = 14814 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_2, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -4.705703e-08 -2.966713e-07 -3.397138e-08 -8.221233e-07 -2.393096e-07
##
## $hess
## [,1] [,2] [,3] [,4] [,5]
## [1,] 23.380150632 -0.03461292 0.001477193 -0.32273648 -0.015300875
## [2,] -0.034612920 2.44629002 0.011640769 0.43097790 0.479687452
## [3,] 0.001477193 0.01164077 1.434720546 0.02860737 -0.008259271
## [4,] -0.322736479 0.43097790 0.028607374 16.20117095 0.840785332
## [5,] -0.015300875 0.47968745 -0.008259271 0.84078533 2.208557963
##
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.83 NA NA
## s(time):aet 50.00 17.52 0.89 0.66
## s(time):prcp 30.00 5.56 0.89 0.64
## s(time):tmax 100.00 62.16 0.89 0.66
## s(time):tmin 100.00 19.89 0.89 0.74
mgcv::plot.gam(
x = vivax_tvcm_2,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_2)
## para s(district) s(time):aet s(time):prcp s(time):tmax s(time):tmin
## worst 1 1.00000000 0.9869412 0.9622823 0.9964367 0.9941330
## observed 1 0.39371642 0.9746461 0.9123273 0.9806178 0.9755543
## estimate 1 0.06319163 0.9755038 0.9124919 0.9778706 0.9814604
mgcv::concurvity(vivax_tvcm_2, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):tmax
## para 1.0000000 0.02040816 0.3263912 0.2923165 0.3242324
## s(district) 1.0000000 1.00000000 0.3275472 0.3057018 0.3334647
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8022260 0.9495872
## s(time):prcp 0.8070714 0.01738709 0.7472882 1.0000000 0.7653253
## s(time):tmax 0.9593538 0.02031868 0.9563895 0.8158516 1.0000000
## s(time):tmin 0.9692789 0.02029111 0.9516214 0.8771643 0.9489120
## s(time):tmin
## para 0.3283258
## s(district) 0.3341169
## s(time):aet 0.9387486
## s(time):prcp 0.8520830
## s(time):tmax 0.9514512
## s(time):tmin 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_2 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.58 sec elapsed
cat("\nConverged?", falciparum_tvcm_2$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_2)
##
## Family: Negative Binomial(0.529)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100) +
## s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.3721 0.4074 -18.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.545 48.000 174.727 <2e-16 ***
## s(time):aet 6.995 8.366 10.820 <2e-16 ***
## s(time):prcp 6.564 7.895 7.666 <2e-16 ***
## s(time):tmax 59.956 71.885 6.137 <2e-16 ***
## s(time):tmin 21.784 26.419 7.077 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.404 Deviance explained = 66.5%
## fREML = 13778 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_2, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -2.180887e-08 -2.163537e-09 -3.442661e-08 -4.918663e-07 -1.809657e-07
##
## $hess
## [,1] [,2] [,3] [,4] [,5]
## [1,] 22.633317911 -0.01330178 -0.003626106 -0.28520792 -0.06332660
## [2,] -0.013301776 1.92861198 0.013414445 -0.07817084 0.03587574
## [3,] -0.003626106 0.01341444 1.274784052 -0.10945297 -0.03129743
## [4,] -0.285207915 -0.07817084 -0.109452972 11.99518120 -0.38841947
## [5,] -0.063326595 0.03587574 -0.031297435 -0.38841947 2.84976050
##
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.54 NA NA
## s(time):aet 50.00 7.00 0.82 0.22
## s(time):prcp 30.00 6.56 0.82 0.20
## s(time):tmax 100.00 59.96 0.82 0.22
## s(time):tmin 100.00 21.78 0.82 0.20
mgcv::plot.gam(
x = falciparum_tvcm_2,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_2)
## para s(district) s(time):aet s(time):prcp s(time):tmax s(time):tmin
## worst 1 1.00000000 0.9869412 0.9622823 0.9964367 0.9941330
## observed 1 0.39272003 0.9698421 0.9061720 0.9786591 0.9762106
## estimate 1 0.06319163 0.9755316 0.9125252 0.9779537 0.9815205
mgcv::concurvity(falciparum_tvcm_2, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):tmax
## para 1.0000000 0.02040816 0.3297986 0.2957433 0.3276114
## s(district) 1.0000000 1.00000000 0.3309685 0.3093380 0.3369594
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8018830 0.9495236
## s(time):prcp 0.8070714 0.01738709 0.7472437 1.0000000 0.7652447
## s(time):tmax 0.9593538 0.02031868 0.9563369 0.8155077 1.0000000
## s(time):tmin 0.9692789 0.02029111 0.9515749 0.8769689 0.9488847
## s(time):tmin
## para 0.3317847
## s(district) 0.3376325
## s(time):aet 0.9386872
## s(time):prcp 0.8520404
## s(time):tmax 0.9514248
## s(time):tmin 1.0000000
Predictors: aet, q, tmin
tictoc::tic("GAM model fitting")
vivax_tvcm_3 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.02 sec elapsed
cat("\nConverged?", vivax_tvcm_3$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_3)
##
## Family: Negative Binomial(0.84)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp",
## k = 30) + s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.6287 0.3369 -19.68 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.844 48.000 307.252 < 2e-16 ***
## s(time):aet 5.612 6.652 4.655 5.75e-05 ***
## s(time):q 5.807 6.972 4.861 1.93e-05 ***
## s(time):tmin 67.408 80.119 9.212 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.495 Deviance explained = 67.8%
## fREML = 14797 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_3, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 2.866972e-09 2.097608e-08 1.641944e-08 1.491855e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 23.514289047 -0.003531395 -0.002593209 -0.08947966
## [2,] -0.003531395 1.606928284 -0.010680700 -0.15476745
## [3,] -0.002593209 -0.010680700 1.023805126 0.04366450
## [4,] -0.089479657 -0.154767453 0.043664498 22.30828296
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.84 NA NA
## s(time):aet 50.00 5.61 0.84 0.35
## s(time):q 30.00 5.81 0.84 0.36
## s(time):tmin 100.00 67.41 0.84 0.32
mgcv::plot.gam(
x = vivax_tvcm_3,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_3)
## para s(district) s(time):aet s(time):q s(time):tmin
## worst 1 1.00000000 0.9817423 0.9070719 0.9913637
## observed 1 0.14292296 0.9584068 0.8097720 0.9400999
## estimate 1 0.04435857 0.9644319 0.8284583 0.9686361
mgcv::concurvity(vivax_tvcm_3, full = FALSE)$estimate
## para s(district) s(time):aet s(time):q s(time):tmin
## para 1.0000000 0.02040816 0.3311993 0.2369456 0.3336484
## s(district) 1.0000000 1.00000000 0.3323752 0.2634173 0.3395257
## s(time):aet 0.9668224 0.01992532 1.0000000 0.6665501 0.9386557
## s(time):q 0.6399314 0.01471070 0.5606461 1.0000000 0.7095498
## s(time):tmin 0.9692789 0.02029111 0.9515653 0.7818332 1.0000000
Predictors: aet, q, tmin
tictoc::tic("GAM model fitting")
falciparum_tvcm_3 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.64 sec elapsed
cat("\nConverged?", falciparum_tvcm_3$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_3)
##
## Family: Negative Binomial(0.502)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp",
## k = 30) + s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9864 0.4011 -19.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.581 48.000 188.647 <2e-16 ***
## s(time):aet 5.683 6.710 10.252 <2e-16 ***
## s(time):q 7.292 8.784 7.218 <2e-16 ***
## s(time):tmin 57.363 69.563 5.843 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.367 Deviance explained = 64.8%
## fREML = 13753 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_3, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 1.978898e-07 1.151730e-07 1.387608e-07 1.287021e-06
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 22.867144989 -0.007227575 -0.001749774 -0.08861918
## [2,] -0.007227575 1.111698746 -0.028527310 -0.06439048
## [3,] -0.001749774 -0.028527310 1.590586583 -0.17400596
## [4,] -0.088619177 -0.064390480 -0.174005959 13.33578457
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.58 NA NA
## s(time):aet 50.00 5.68 0.8 0.15
## s(time):q 30.00 7.29 0.8 0.12
## s(time):tmin 100.00 57.36 0.8 0.07 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
x = falciparum_tvcm_3,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_3)
## para s(district) s(time):aet s(time):q s(time):tmin
## worst 1 1.00000000 0.9817423 0.9070719 0.9913637
## observed 1 0.10501527 0.9633845 0.8201042 0.9393871
## estimate 1 0.04435857 0.9643944 0.8283200 0.9685848
mgcv::concurvity(falciparum_tvcm_3, full = FALSE)$estimate
## para s(district) s(time):aet s(time):q s(time):tmin
## para 1.0000000 0.02040816 0.3296516 0.2358854 0.3316726
## s(district) 1.0000000 1.00000000 0.3308209 0.2622116 0.3375185
## s(time):aet 0.9668224 0.01992532 1.0000000 0.6665940 0.9386850
## s(time):q 0.6399314 0.01471070 0.5605220 1.0000000 0.7094785
## s(time):tmin 0.9692789 0.02029111 0.9515773 0.7818353 1.0000000
Predictors: aet, q, tmax
tictoc::tic("GAM model fitting")
vivax_tvcm_4 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.37 sec elapsed
cat("\nConverged?", vivax_tvcm_4$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_4)
##
## Family: Negative Binomial(0.871)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp",
## k = 30) + s(time, by = tmax, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.1403 0.3279 -18.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.830 48.000 275.840 <2e-16 ***
## s(time):aet 20.959 24.919 14.252 <2e-16 ***
## s(time):q 7.488 9.004 11.022 <2e-16 ***
## s(time):tmax 61.746 74.425 8.566 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.491 Deviance explained = 68.9%
## fREML = 14797 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_4, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 1.079556e-08 1.345414e-07 3.612030e-08 3.334112e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 23.42193672 -0.02028744 -0.01269582 -0.2146663
## [2,] -0.02028744 3.17821799 0.08078295 0.7534201
## [3,] -0.01269582 0.08078295 1.40303917 -0.1287341
## [4,] -0.21466632 0.75342007 -0.12873408 15.7944463
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.83 NA NA
## s(time):aet 50.00 20.96 0.86 0.40
## s(time):q 30.00 7.49 0.86 0.42
## s(time):tmax 100.00 61.75 0.86 0.47
mgcv::plot.gam(
x = vivax_tvcm_4,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_4)
## para s(district) s(time):aet s(time):q s(time):tmax
## worst 1 1.00000000 0.9842730 0.8891322 0.9943222
## observed 1 0.29327672 0.9598504 0.7724446 0.9759755
## estimate 1 0.04670974 0.9672774 0.7867258 0.9663222
mgcv::concurvity(vivax_tvcm_4, full = FALSE)$estimate
## para s(district) s(time):aet s(time):q s(time):tmax
## para 1.0000000 0.02040816 0.3307154 0.2365662 0.3288979
## s(district) 1.0000000 1.00000000 0.3318894 0.2629895 0.3382887
## s(time):aet 0.9668224 0.01992532 1.0000000 0.6665718 0.9495005
## s(time):q 0.6399314 0.01471070 0.5606215 1.0000000 0.5966078
## s(time):tmax 0.9593538 0.02031868 0.9563304 0.7073331 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_4 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.25 sec elapsed
cat("\nConverged?", falciparum_tvcm_4$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_4)
##
## Family: Negative Binomial(0.511)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = q, bs = "tp",
## k = 30) + s(time, by = tmax, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4326 0.3913 -19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.546 49.00 169.362 <2e-16 ***
## s(time):aet 8.782 10.58 13.646 <2e-16 ***
## s(time):q 8.412 10.13 12.090 <2e-16 ***
## s(time):tmax 56.118 68.28 6.309 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.335 Deviance explained = 65.4%
## fREML = 13741 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_4, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 4.826851e-08 8.494352e-08 8.504216e-08 7.977093e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 22.692965527 0.01110830 -0.005933296 -0.194848153
## [2,] 0.011108304 1.34112132 0.082482853 -0.113475094
## [3,] -0.005933296 0.08248285 1.760673570 -0.001891677
## [4,] -0.194848153 -0.11347509 -0.001891677 11.313262733
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.55 NA NA
## s(time):aet 50.00 8.78 0.82 0.30
## s(time):q 30.00 8.41 0.82 0.22
## s(time):tmax 100.00 56.12 0.82 0.30
mgcv::plot.gam(
x = falciparum_tvcm_4,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_4)
## para s(district) s(time):aet s(time):q s(time):tmax
## worst 1 1.00000000 0.9842730 0.8891322 0.9943222
## observed 1 0.30712502 0.9558048 0.7845443 0.9751917
## estimate 1 0.04670974 0.9672780 0.7866885 0.9663055
mgcv::concurvity(falciparum_tvcm_4, full = FALSE)$estimate
## para s(district) s(time):aet s(time):q s(time):tmax
## para 1.0000000 0.02040816 0.3306742 0.2367472 0.3284921
## s(district) 1.0000000 1.00000000 0.3318475 0.2631846 0.3378704
## s(time):aet 0.9668224 0.01992532 1.0000000 0.6665000 0.9495031
## s(time):q 0.6399314 0.01471070 0.5605250 1.0000000 0.5965507
## s(time):tmax 0.9593538 0.02031868 0.9563268 0.7072343 1.0000000
Predictors: aet, prcp, tmin
tictoc::tic("GAM model fitting")
vivax_tvcm_5_poiss <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = poisson(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.62 sec elapsed
cat("\nConverged?", vivax_tvcm_5_poiss$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_poiss)
##
## Family: poisson
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.1645 0.3114 -23.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(district) 47.91 49.00 484098 <2e-16 ***
## s(time):aet 49.34 49.96 11120 <2e-16 ***
## s(time):prcp 29.71 29.99 5362 <2e-16 ***
## s(time):tmin 99.10 99.96 16965 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.597 Deviance explained = 74.6%
## fREML = 1.8194e+05 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_poiss, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 1.890420e-05 1.178532e-07 3.025197e-07 -2.230119e-08
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 23.591754270 -0.001002867 -0.001194892 -0.002640216
## [2,] -0.001002867 22.948752544 -0.076623413 -0.565119285
## [3,] -0.001194892 -0.076623413 13.053785783 -0.196850760
## [4,] -0.002640216 -0.565119285 -0.196850760 46.999138832
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.0 47.9 NA NA
## s(time):aet 50.0 49.3 0.99 0.66
## s(time):prcp 30.0 29.7 0.99 0.68
## s(time):tmin 100.0 99.1 0.99 0.69
sim_res_vivax_tvcm_5_poiss <- simulateResiduals(
fittedModel = vivax_tvcm_5_poiss,
integerResponse = TRUE
)
plot(sim_res_vivax_tvcm_5_poiss)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
testDispersion(sim_res_vivax_tvcm_5_poiss)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 117.02, p-value < 2.2e-16
## alternative hypothesis: two.sided
tictoc::tic("GAM model fitting")
vivax_tvcm_5 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.06 sec elapsed
cat("\nConverged?", vivax_tvcm_5$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5)
##
## Family: Negative Binomial(0.84)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.6239 0.3372 -19.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.844 49.000 300.903 < 2e-16 ***
## s(time):aet 5.416 6.409 3.910 0.000509 ***
## s(time):prcp 5.516 6.620 4.696 6.76e-05 ***
## s(time):tmin 67.415 80.126 9.273 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.495 Deviance explained = 67.8%
## fREML = 14796 Scale est. = 1 n = 9996
sim_res_vivax_tvcm_5 <- simulateResiduals(
fittedModel = vivax_tvcm_5,
integerResponse = TRUE,
n = 1000
)
plot(sim_res_vivax_tvcm_5)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
testDispersion(sim_res_vivax_tvcm_5)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.5403, p-value < 2.2e-16
## alternative hypothesis: two.sided
mgcv::summary.gam(vivax_tvcm_5)$s.table
## edf Ref.df F p-value
## s(district) 47.844266 49.000000 300.902594 0.000000e+00
## s(time):aet 5.416405 6.408889 3.909630 5.088126e-04
## s(time):prcp 5.515848 6.619505 4.695992 6.762316e-05
## s(time):tmin 67.414970 80.126333 9.272846 0.000000e+00
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 4.727969e-09 2.539192e-08 1.453370e-08 1.072172e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 23.51411546 -0.002954960 -0.003977930 -0.08878888
## [2,] -0.00295496 1.483205961 0.003150312 -0.18796755
## [3,] -0.00397793 0.003150312 1.024386783 0.09059968
## [4,] -0.08878888 -0.187967553 0.090599677 22.36730526
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.84 NA NA
## s(time):aet 50.00 5.42 0.87 0.48
## s(time):prcp 30.00 5.52 0.87 0.46
## s(time):tmin 100.00 67.41 0.87 0.44
mgcv::plot.gam(
x = vivax_tvcm_5,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_5)
## para s(district) s(time):aet s(time):prcp s(time):tmin
## worst 1 1.00000000 0.9809642 0.9594399 0.9914194
## observed 1 0.14249995 0.9593331 0.8891144 0.9411815
## estimate 1 0.04423609 0.9626495 0.9002785 0.9691286
mgcv::concurvity(vivax_tvcm_5, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):tmin
## para 1.0000000 0.02040816 0.3303029 0.2956675 0.3329571
## s(district) 1.0000000 1.00000000 0.3314755 0.3092617 0.3388226
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8019846 0.9386692
## s(time):prcp 0.8070714 0.01738709 0.7473701 1.0000000 0.8520751
## s(time):tmin 0.9692789 0.02029111 0.9515784 0.8770551 1.0000000
tictoc::tic("GAM model fitting")
vivax_tvcm_5 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.02 sec elapsed
cat("\nConverged?", vivax_tvcm_5$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5)
##
## Family: Negative Binomial(0.84)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.6239 0.3372 -19.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.844 49.000 300.903 < 2e-16 ***
## s(time):aet 5.416 6.409 3.910 0.000509 ***
## s(time):prcp 5.516 6.620 4.696 6.76e-05 ***
## s(time):tmin 67.415 80.126 9.273 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.495 Deviance explained = 67.8%
## fREML = 14796 Scale est. = 1 n = 9996
tictoc::tic("GAM model fitting")
falciparum_tvcm_5 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.04 sec elapsed
cat("\nConverged?", falciparum_tvcm_5$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5)
##
## Family: Negative Binomial(0.502)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmin, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9621 0.4015 -19.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.581 49.000 184.640 <2e-16 ***
## s(time):aet 5.284 6.218 10.061 <2e-16 ***
## s(time):prcp 7.065 8.515 6.817 <2e-16 ***
## s(time):tmin 57.415 69.616 5.800 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.37 Deviance explained = 64.8%
## fREML = 13751 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 3.410725e-08 2.814923e-08 4.201096e-08 3.672202e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 22.866506243 -0.008333529 -0.002181398 -0.08758758
## [2,] -0.008333529 1.037326967 0.022318126 -0.12070199
## [3,] -0.002181398 0.022318126 1.485439464 -0.17431039
## [4,] -0.087587583 -0.120701995 -0.174310387 13.33736634
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.58 NA NA
## s(time):aet 50.00 5.28 0.83 0.54
## s(time):prcp 30.00 7.06 0.83 0.53
## s(time):tmin 100.00 57.41 0.83 0.53
mgcv::plot.gam(
x = falciparum_tvcm_5,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_5)
## para s(district) s(time):aet s(time):prcp s(time):tmin
## worst 1 1.00000000 0.9809642 0.9594399 0.9914194
## observed 1 0.10468955 0.9629736 0.8992429 0.9407605
## estimate 1 0.04423609 0.9626799 0.9002293 0.9691427
mgcv::concurvity(falciparum_tvcm_5, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):tmin
## para 1.0000000 0.02040816 0.3318222 0.2976411 0.3338610
## s(district) 1.0000000 1.00000000 0.3330002 0.3113452 0.3397427
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8017250 0.9386454
## s(time):prcp 0.8070714 0.01738709 0.7472135 1.0000000 0.8520028
## s(time):tmin 0.9692789 0.02029111 0.9515499 0.8768750 1.0000000
Predictors: aet, prcp,tmax
tictoc::tic("GAM model fitting")
vivax_tvcm_6 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.15 sec elapsed
cat("\nConverged?", vivax_tvcm_6$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_6)
##
## Family: Negative Binomial(0.871)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.1283 0.3284 -18.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.829 48.000 275.840 <2e-16 ***
## s(time):aet 20.876 24.824 12.555 <2e-16 ***
## s(time):prcp 7.422 8.921 11.181 <2e-16 ***
## s(time):tmax 61.736 74.415 8.656 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.49 Deviance explained = 68.9%
## fREML = 14796 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_6, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 5.493435e-08 4.307864e-07 1.125115e-07 1.064589e-06
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 23.42161501 -0.02076764 -0.01220772 -0.2133830
## [2,] -0.02076764 3.16185881 0.10053404 0.7462853
## [3,] -0.01220772 0.10053404 1.40780965 -0.1349497
## [4,] -0.21338302 0.74628531 -0.13494969 16.0251695
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.83 NA NA
## s(time):aet 50.00 20.88 0.87 0.78
## s(time):prcp 30.00 7.42 0.87 0.79
## s(time):tmax 100.00 61.74 0.87 0.84
mgcv::plot.gam(
x = vivax_tvcm_6,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_6)
## para s(district) s(time):aet s(time):prcp s(time):tmax
## worst 1 1.00000000 0.9837593 0.9379012 0.9943577
## observed 1 0.29169948 0.9644273 0.8742674 0.9761808
## estimate 1 0.04660462 0.9691804 0.8762339 0.9666034
mgcv::concurvity(vivax_tvcm_6, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):tmax
## para 1.0000000 0.02040816 0.3314476 0.2967447 0.3299469
## s(district) 1.0000000 1.00000000 0.3326250 0.3104059 0.3393724
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8018809 0.9494858
## s(time):prcp 0.8070714 0.01738709 0.7473645 1.0000000 0.7652660
## s(time):tmax 0.9593538 0.02031868 0.9563235 0.8155483 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_6 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.55 sec elapsed
cat("\nConverged?", falciparum_tvcm_6$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_6)
##
## Family: Negative Binomial(0.51)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = aet, bs = "tp", k = 50) + s(time, by = prcp,
## bs = "tp", k = 30) + s(time, by = tmax, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4132 0.3923 -18.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.545 49.000 169.268 <2e-16 ***
## s(time):aet 8.169 9.826 11.746 <2e-16 ***
## s(time):prcp 8.717 10.508 11.394 <2e-16 ***
## s(time):tmax 56.527 68.730 6.357 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.336 Deviance explained = 65.4%
## fREML = 13741 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_6, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -3.079698e-09 -5.460274e-09 -7.074117e-09 -5.653783e-08
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 22.691409665 0.007411369 -0.003187324 -0.18669615
## [2,] 0.007411369 1.255975890 0.180441245 -0.11942284
## [3,] -0.003187324 0.180441245 1.649816644 -0.08929099
## [4,] -0.186696151 -0.119422841 -0.089290993 11.38000552
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.55 NA NA
## s(time):aet 50.00 8.17 0.81 0.20
## s(time):prcp 30.00 8.72 0.81 0.22
## s(time):tmax 100.00 56.53 0.81 0.20
mgcv::plot.gam(
x = falciparum_tvcm_6,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_6)
## para s(district) s(time):aet s(time):prcp s(time):tmax
## worst 1 1.00000000 0.9837593 0.9379012 0.9943577
## observed 1 0.30596140 0.9614097 0.8819831 0.9754096
## estimate 1 0.04660462 0.9691827 0.8761934 0.9665812
mgcv::concurvity(falciparum_tvcm_6, full = FALSE)$estimate
## para s(district) s(time):aet s(time):prcp s(time):tmax
## para 1.0000000 0.02040816 0.3314522 0.2971256 0.3294596
## s(district) 1.0000000 1.00000000 0.3326289 0.3108008 0.3388704
## s(time):aet 0.9668224 0.01992532 1.0000000 0.8018003 0.9494860
## s(time):prcp 0.8070714 0.01738709 0.7472604 1.0000000 0.7652080
## s(time):tmax 0.9593538 0.02031868 0.9563178 0.8154353 1.0000000
Predictors: q, tmin
tictoc::tic("GAM model fitting")
vivax_tvcm_7 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.8 sec elapsed
cat("\nConverged?", vivax_tvcm_7$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_7)
##
## Family: Negative Binomial(0.837)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = q, bs = "tp", k = 30) + s(time, by = tmin, bs = "tp",
## k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.7473 0.3282 -20.56 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.845 49.000 301.485 < 2e-16 ***
## s(time):q 5.621 6.744 4.504 8.22e-05 ***
## s(time):tmin 69.242 81.867 17.853 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.491 Deviance explained = 67.7%
## fREML = 14790 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_7, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 1.735192e-09 9.644842e-09 1.175017e-07
##
## $hess
## [,1] [,2] [,3]
## [1,] 23.514687561 -0.002282676 -0.084396380
## [2,] -0.002282676 0.794182266 0.005866442
## [3,] -0.084396380 0.005866442 23.869537772
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.84 NA NA
## s(time):q 30.00 5.62 0.87 0.32
## s(time):tmin 100.00 69.24 0.87 0.34
mgcv::plot.gam(
x = vivax_tvcm_7,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_7)
## para s(district) s(time):q s(time):tmin
## worst 1 1.00000000 0.8907614 0.9887135
## observed 1 0.13089053 0.7936389 0.5441603
## estimate 1 0.03893871 0.8068839 0.7975255
mgcv::concurvity(vivax_tvcm_7, full = FALSE)$estimate
## para s(district) s(time):q s(time):tmin
## para 1.0000000 0.02040816 0.2328648 0.3273184
## s(district) 1.0000000 1.00000000 0.2588091 0.3330937
## s(time):q 0.6399314 0.01471070 1.0000000 0.7094052
## s(time):tmin 0.9692789 0.02029111 0.7819597 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_7 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.77 sec elapsed
cat("\nConverged?", falciparum_tvcm_7$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_7)
##
## Family: Negative Binomial(0.497)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = q, bs = "tp", k = 30) + s(time, by = tmin, bs = "tp",
## k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.2864 0.3876 -21.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.581 48.000 187.383 <2e-16 ***
## s(time):q 7.353 8.859 7.328 <2e-16 ***
## s(time):tmin 58.272 70.475 11.305 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.362 Deviance explained = 64.5%
## fREML = 13748 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_7, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 3.487436e-07 1.036569e-07 8.872672e-07
##
## $hess
## [,1] [,2] [,3]
## [1,] 22.8647400856 -0.0009929983 -0.08624672
## [2,] -0.0009929983 1.4679490158 -0.42285662
## [3,] -0.0862467227 -0.4228566198 14.85758815
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.58 NA NA
## s(time):q 30.00 7.35 0.8 0.31
## s(time):tmin 100.00 58.27 0.8 0.32
mgcv::plot.gam(
x = falciparum_tvcm_7,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_7)
## para s(district) s(time):q s(time):tmin
## worst 1 1.00000000 0.8907614 0.9887135
## observed 1 0.09446697 0.7922898 0.5988347
## estimate 1 0.03893871 0.8069318 0.7975795
mgcv::concurvity(falciparum_tvcm_7, full = FALSE)$estimate
## para s(district) s(time):q s(time):tmin
## para 1.0000000 0.02040816 0.2327271 0.3274677
## s(district) 1.0000000 1.00000000 0.2586449 0.3332449
## s(time):q 0.6399314 0.01471070 1.0000000 0.7094382
## s(time):tmin 0.9692789 0.02029111 0.7820277 1.0000000
Predictors: q, tmax
tictoc::tic("GAM model fitting")
vivax_tvcm_8 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.73 sec elapsed
cat("\nConverged?", vivax_tvcm_8$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_8)
##
## Family: Negative Binomial(0.835)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = q, bs = "tp", k = 30) + s(time, by = tmax, bs = "tp",
## k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.3144 0.3291 -19.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.833 48.00 268.49 <2e-16 ***
## s(time):q 9.554 11.53 10.24 <2e-16 ***
## s(time):tmax 66.884 79.60 15.70 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.486 Deviance explained = 67.7%
## fREML = 14787 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_8, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 7.924974e-08 1.441898e-07 1.283543e-06
##
## $hess
## [,1] [,2] [,3]
## [1,] 23.42977248 -0.01448436 -0.3194353
## [2,] -0.01448436 1.82053726 -0.2243699
## [3,] -0.31943527 -0.22436988 21.0173453
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.83 NA NA
## s(time):q 30.00 9.55 0.86 0.14
## s(time):tmax 100.00 66.88 0.86 0.15
mgcv::plot.gam(
x = vivax_tvcm_8,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_8)
## para s(district) s(time):q s(time):tmax
## worst 1 1.00000000 0.8760397 0.9914966
## observed 1 0.30259653 0.7419765 0.7641925
## estimate 1 0.04152774 0.7615721 0.7242683
mgcv::concurvity(vivax_tvcm_8, full = FALSE)$estimate
## para s(district) s(time):q s(time):tmax
## para 1.0000000 0.02040816 0.2337196 0.3242180
## s(district) 1.0000000 1.00000000 0.2597761 0.3334529
## s(time):q 0.6399314 0.01471070 1.0000000 0.5965156
## s(time):tmax 0.9593538 0.02031868 0.7074298 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_8 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = q, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.1 sec elapsed
cat("\nConverged?", falciparum_tvcm_8$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_8)
##
## Family: Negative Binomial(0.498)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = q, bs = "tp", k = 30) + s(time, by = tmax, bs = "tp",
## k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.3781 0.3854 -19.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.544 48.00 170.31 <2e-16 ***
## s(time):q 8.954 10.79 11.10 <2e-16 ***
## s(time):tmax 54.214 66.05 12.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.36 Deviance explained = 64.6%
## fREML = 13735 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_8, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 3.402689e-08 2.992471e-08 2.725303e-07
##
## $hess
## [,1] [,2] [,3]
## [1,] 22.7011775413 -0.0002516576 -0.2702655
## [2,] -0.0002516576 1.7900699736 -0.1938459
## [3,] -0.2702654828 -0.1938458871 10.9574371
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.54 NA NA
## s(time):q 30.00 8.95 0.85 0.96
## s(time):tmax 100.00 54.21 0.85 0.95
mgcv::plot.gam(
x = falciparum_tvcm_8,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_8)
## para s(district) s(time):q s(time):tmax
## worst 1 1.00000000 0.8760397 0.9914966
## observed 1 0.30375752 0.7739353 0.8719470
## estimate 1 0.04152774 0.7613646 0.7234999
mgcv::concurvity(falciparum_tvcm_8, full = FALSE)$estimate
## para s(district) s(time):q s(time):tmax
## para 1.0000000 0.02040816 0.2317413 0.3223674
## s(district) 1.0000000 1.00000000 0.2575195 0.3315370
## s(time):q 0.6399314 0.01471070 1.0000000 0.5965881
## s(time):tmax 0.9593538 0.02031868 0.7078390 1.0000000
Predictors: prcp, tmin
tictoc::tic("GAM model fitting")
vivax_tvcm_9 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.69 sec elapsed
cat("\nConverged?", vivax_tvcm_9$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_9)
##
## Family: Negative Binomial(0.837)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmin,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.7056 0.3289 -20.39 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.84 48.000 308.263 < 2e-16 ***
## s(time):prcp 5.65 6.787 4.975 2.26e-05 ***
## s(time):tmin 68.84 81.469 14.750 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.493 Deviance explained = 67.7%
## fREML = 14789 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_9, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 1.389883e-08 1.636942e-08 2.177923e-07
##
## $hess
## [,1] [,2] [,3]
## [1,] 23.515078667 -0.003024977 -0.07629346
## [2,] -0.003024977 1.088001137 0.10296537
## [3,] -0.076293460 0.102965374 23.71558050
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.84 NA NA
## s(time):prcp 30.00 5.65 0.84 0.015 *
## s(time):tmin 100.00 68.84 0.84 0.025 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
x = vivax_tvcm_9,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_9)
## para s(district) s(time):prcp s(time):tmin
## worst 1 1.00000000 0.9495201 0.9886612
## observed 1 0.12663975 0.8834001 0.6545112
## estimate 1 0.03869671 0.8920174 0.8931749
mgcv::concurvity(vivax_tvcm_9, full = FALSE)$estimate
## para s(district) s(time):prcp s(time):tmin
## para 1.0000000 0.02040816 0.2971182 0.3327393
## s(district) 1.0000000 1.00000000 0.3107984 0.3386034
## s(time):prcp 0.8070714 0.01738709 1.0000000 0.8520021
## s(time):tmin 0.9692789 0.02029111 0.8768477 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_9 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.78 sec elapsed
cat("\nConverged?", falciparum_tvcm_9$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_9)
##
## Family: Negative Binomial(0.496)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmin,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.1803 0.3887 -21.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.582 48.000 187.426 <2e-16 ***
## s(time):prcp 7.433 8.971 7.337 <2e-16 ***
## s(time):tmin 57.569 69.687 8.646 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.367 Deviance explained = 64.5%
## fREML = 13747 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_9, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 5.041353e-07 1.378462e-07 1.259055e-06
##
## $hess
## [,1] [,2] [,3]
## [1,] 22.865004984 -0.001410579 -0.08140897
## [2,] -0.001410579 1.659746451 -0.34199534
## [3,] -0.081408969 -0.341995341 14.33586510
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.58 NA NA
## s(time):prcp 30.00 7.43 0.82 0.57
## s(time):tmin 100.00 57.57 0.82 0.54
mgcv::plot.gam(
x = falciparum_tvcm_9,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_9)
## para s(district) s(time):prcp s(time):tmin
## worst 1 1.00000000 0.9495201 0.9886612
## observed 1 0.09182003 0.8881393 0.7222467
## estimate 1 0.03869671 0.8919952 0.8926087
mgcv::concurvity(falciparum_tvcm_9, full = FALSE)$estimate
## para s(district) s(time):prcp s(time):tmin
## para 1.0000000 0.02040816 0.2925201 0.3285027
## s(district) 1.0000000 1.00000000 0.3059347 0.3342968
## s(time):prcp 0.8070714 0.01738709 1.0000000 0.8520700
## s(time):tmin 0.9692789 0.02029111 0.8771001 1.0000000
Predictors: prcp, tmax
tictoc::tic("GAM model fitting")
vivax_tvcm_10 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.08 sec elapsed
cat("\nConverged?", vivax_tvcm_10$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_10)
##
## Family: Negative Binomial(0.841)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmax,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.2996 0.3301 -19.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.83 48.00 269.75 <2e-16 ***
## s(time):prcp 11.68 14.08 12.81 <2e-16 ***
## s(time):tmax 67.03 79.72 12.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.483 Deviance explained = 67.9%
## fREML = 14788 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_10, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 9.338716e-09 2.790006e-08 1.704440e-07
##
## $hess
## [,1] [,2] [,3]
## [1,] 23.42986869 -0.01280447 -0.2989760
## [2,] -0.01280447 1.60238903 -0.3085009
## [3,] -0.29897601 -0.30850091 21.1112544
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.0 47.8 NA NA
## s(time):prcp 30.0 11.7 0.88 0.71
## s(time):tmax 100.0 67.0 0.88 0.71
mgcv::plot.gam(
x = vivax_tvcm_10,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_10)
## para s(district) s(time):prcp s(time):tmax
## worst 1 1.00000000 0.9230594 0.9915651
## observed 1 0.30114341 0.8477614 0.8215732
## estimate 1 0.04119987 0.8548854 0.8369062
mgcv::concurvity(vivax_tvcm_10, full = FALSE)$estimate
## para s(district) s(time):prcp s(time):tmax
## para 1.0000000 0.02040816 0.2970109 0.3284053
## s(district) 1.0000000 1.00000000 0.3106873 0.3377823
## s(time):prcp 0.8070714 0.01738709 1.0000000 0.7651786
## s(time):tmax 0.9593538 0.02031868 0.8152860 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_10 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re", k = 49) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = tmax, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
## Warning in estimate.theta(theta, family, y, mu, scale = scale1, wt = G$w, : step
## failure in theta estimation
## Warning in estimate.theta(theta, family, y, mu, scale = scale1, wt = G$w, : step
## failure in theta estimation
tictoc::toc()
## GAM model fitting: 2.14 sec elapsed
cat("\nConverged?", falciparum_tvcm_10$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_10)
##
## Family: Negative Binomial(0.501)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re", k = 49) +
## s(time, by = prcp, bs = "tp", k = 30) + s(time, by = tmax,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.3161 0.3871 -18.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.545 48.00 170.810 <2e-16 ***
## s(time):prcp 9.846 11.88 12.480 <2e-16 ***
## s(time):tmax 54.362 66.20 9.387 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.359 Deviance explained = 64.8%
## fREML = 13734 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_10, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 4.173538e-08 8.067642e-09 2.735690e-08
##
## $hess
## [,1] [,2] [,3]
## [1,] 22.701622809 0.004404026 -0.2479362
## [2,] 0.004404026 1.981886586 -0.2084873
## [3,] -0.247936218 -0.208487301 10.3205637
##
## Model rank = 180 / 180
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.54 NA NA
## s(time):prcp 30.00 9.85 0.81 0.26
## s(time):tmax 100.00 54.36 0.81 0.23
mgcv::plot.gam(
x = falciparum_tvcm_10,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_10)
## para s(district) s(time):prcp s(time):tmax
## worst 1 1.00000000 0.9230594 0.9915651
## observed 1 0.30310171 0.8717537 0.9064501
## estimate 1 0.04119987 0.8546375 0.8361792
mgcv::concurvity(falciparum_tvcm_10, full = FALSE)$estimate
## para s(district) s(time):prcp s(time):tmax
## para 1.0000000 0.02040816 0.2934553 0.3254020
## s(district) 1.0000000 1.00000000 0.3069260 0.3346757
## s(time):prcp 0.8070714 0.01738709 1.0000000 0.7652822
## s(time):tmax 0.9593538 0.02031868 0.8156583 1.0000000
AIC(
vivax_tvcm_0,
vivax_tvcm_1,
vivax_tvcm_2,
vivax_tvcm_3,
vivax_tvcm_4,
vivax_tvcm_5,
vivax_tvcm_6,
vivax_tvcm_7,
vivax_tvcm_8,
vivax_tvcm_9,
vivax_tvcm_10
) -> vivax_tvcm_aic
vivax_tvcms <- list(
vivax_tvcm_0,
vivax_tvcm_1,
vivax_tvcm_2,
vivax_tvcm_3,
vivax_tvcm_4,
vivax_tvcm_5,
vivax_tvcm_6,
vivax_tvcm_7,
vivax_tvcm_8,
vivax_tvcm_9,
vivax_tvcm_10
)
n_models <- length(vivax_tvcms)
vivax_tvcm_dev <- rep(0, n_models)
for(i in 1:n_models) {
vivax_tvcm_dev[i] <- deviance(vivax_tvcms[[i]])
}
eval_vivax_tvcm <- tibble::tibble(
model = row.names(vivax_tvcm_aic),
df = round(vivax_tvcm_aic$df, 2),
aic = round(vivax_tvcm_aic$AIC, 2),
deviance = round(vivax_tvcm_dev, 2)
)
eval_vivax_tvcm$aicDiff <- round(AIC(vivax_tvcm_0) - eval_vivax_tvcm$aic, 2)
eval_vivax_tvcm
## # A tibble: 11 x 5
## model df aic deviance aicDiff
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 vivax_tvcm_0 161. 70161. 10410. 0
## 2 vivax_tvcm_1 159. 70159. 10410. 1.69
## 3 vivax_tvcm_2 159. 70161. 10411. -0.24
## 4 vivax_tvcm_3 131. 70780. 10491. -619.
## 5 vivax_tvcm_4 143. 70476. 10442. -315.
## 6 vivax_tvcm_5 130. 70783. 10491. -622.
## 7 vivax_tvcm_6 143. 70475. 10442. -314.
## 8 vivax_tvcm_7 127. 70798. 10487. -637.
## 9 vivax_tvcm_8 128. 70809. 10479. -648.
## 10 vivax_tvcm_9 126. 70797. 10488. -636.
## 11 vivax_tvcm_10 131. 70752. 10470. -591.
# readr::write_csv(eval_vivax_tvcm, "eval_vivax_tvcm.csv")
AIC(
falciparum_tvcm_0,
falciparum_tvcm_1,
falciparum_tvcm_2,
falciparum_tvcm_3,
falciparum_tvcm_4,
falciparum_tvcm_5,
falciparum_tvcm_6,
falciparum_tvcm_7,
falciparum_tvcm_8,
falciparum_tvcm_9,
falciparum_tvcm_10
) -> falciparum_tvcm_aic
falciparum_tvcms <- list(
falciparum_tvcm_0,
falciparum_tvcm_1,
falciparum_tvcm_2,
falciparum_tvcm_3,
falciparum_tvcm_4,
falciparum_tvcm_5,
falciparum_tvcm_6,
falciparum_tvcm_7,
falciparum_tvcm_8,
falciparum_tvcm_9,
falciparum_tvcm_10
)
n_models <- length(falciparum_tvcms)
falciparum_tvcm_dev <- rep(0, n_models)
for(i in 1:n_models) {
falciparum_tvcm_dev[i] <- deviance(falciparum_tvcms[[i]])
}
eval_falciparum_tvcm <- tibble::tibble(
model = row.names(falciparum_tvcm_aic),
df = round(falciparum_tvcm_aic$df, 2),
aic = round(falciparum_tvcm_aic$AIC, 2),
deviance = round(falciparum_tvcm_dev, 2)
)
eval_falciparum_tvcm$aicDiff <- round(AIC(falciparum_tvcm_0) - eval_falciparum_tvcm$aic, 2)
eval_falciparum_tvcm
## # A tibble: 11 x 5
## model df aic deviance aicDiff
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 falciparum_tvcm_0 150. 45901. 8423. 0
## 2 falciparum_tvcm_1 148. 45902. 8425. -1.43
## 3 falciparum_tvcm_2 148. 45905. 8425. -4.51
## 4 falciparum_tvcm_3 123. 46208. 8486. -307.
## 5 falciparum_tvcm_4 126. 46088. 8454. -187.
## 6 falciparum_tvcm_5 122. 46214. 8486. -313.
## 7 falciparum_tvcm_6 126. 46091. 8454. -190.
## 8 falciparum_tvcm_7 117. 46269. 8492. -368.
## 9 falciparum_tvcm_8 115. 46234. 8484. -333.
## 10 falciparum_tvcm_9 117. 46270. 8492. -370.
## 11 falciparum_tvcm_10 116. 46206. 8477. -305.
# readr::write_csv(eval_falciparum_tvcm, "eval_falciparum_tvcm.csv")
df_pamafro_period <- tibble::tibble(
start = lubridate::make_datetime(year = 2005, month = 10, day = 1L),
end = lubridate::make_datetime(year = 2010, month = 09, day = 1L)
)
tvcm_5_terms <- c(
"Actual Evapotranspiration",
"Precipitation",
"Min Temperature"
)
get_tvcoef <- function(plot_vivax, plot_falciparum, labels) {
nplots_vivax <- length(plot_vivax)
nplots_falciparum <- length(plot_falciparum)
assertthat::are_equal(nplots_vivax, nplots_falciparum)
nplots <- nplots_vivax
tvcoef_vivax <- vector(mode = "list", length = nplots - 1)
for (i in 2:nplots) {
tvcoef_vivax[[(i-1)]] <-
tibble::tibble(
x = lubridate::as_datetime(plot_vivax[[i]]$x),
se = plot_vivax[[i]]$se,
fit = plot_vivax[[i]]$fit,
term = labels[(i-1)],
specie = "vivax"
)
}
df_tvcoef_vivax <- dplyr::bind_rows(tvcoef_vivax)
tvcoef_falciparum <- vector(mode = "list", length = nplots - 1)
for (i in 2:nplots) {
tvcoef_falciparum[[(i-1)]] <-
tibble::tibble(
x = lubridate::as_datetime(plot_falciparum[[i]]$x),
se = plot_falciparum[[i]]$se,
fit = plot_falciparum[[i]]$fit,
term = labels[(i-1)],
specie = "falciparum"
)
}
df_tvcoef_falciparum <- dplyr::bind_rows(tvcoef_falciparum)
df_tvcoef <- dplyr::bind_rows(df_tvcoef_vivax, df_tvcoef_falciparum)
df_tvcoef$term <- factor(df_tvcoef$term, labels)
df_tvcoef$specie <- factor(
x = df_tvcoef$specie,
levels = c("vivax", "falciparum"),
labels = c("P. vivax", "P. falciparum")
)
df_tvcoef
}
plot_tvcm_by_terms <- function(df_tvcm) {
p <- ggplot(df_tvcm)
# Add grid
p <- p + facet_wrap(~ term, ncol = 1, scales = "free", strip = "top")
# Add lines
p <- p + geom_line(mapping = aes(x = x, y = fit, color = specie), size = 1)
# Add confidence intervals
p <- p + geom_ribbon(
mapping = aes(
x = x,
y = fit,
ymin = fit + se,
ymax = fit - se,
fill = specie
),
alpha = 0.1
)
# Add PAMAFRO's period layer
p <- p + geom_rect(
data = df_pamafro_period,
mapping = aes(xmin = start, xmax = end),
ymin = -100,
ymax = 100,
alpha = 0.2
)
# Add PAMAFRO's period lines
p <- p + geom_vline(
xintercept = c(df_pamafro_period$start, df_pamafro_period$end),
linetype = "dashed",
alpha = 0.6
)
# Add y = 0 line
p <- p + geom_hline(mapping = aes(yintercept = 0), alpha = 0.6)
# Scale date
p <- p + scale_x_datetime(date_labels = "%Y", date_breaks = "1 year")
# Add labels
p <- p + labs(y = "b(t)", x = NULL)
# Format layer
p <- p + theme(legend.position = "top", legend.title = element_blank())
show(p)
}
df_tvcm_5<- get_tvcoef(
plot_vivax_tvcm_5,
plot_falciparum_tvcm_5,
tvcm_5_terms
)
plot_tvcm_by_terms(df_tvcm_5)
get_period <- function(x) {
ifelse(
x < df_pamafro_period$start,
"before",
ifelse(
x > df_pamafro_period$end,
"after",
"during"
)
)
}
get_ci <- function(x) {
sprintf("[%s, %s]", round(x[1] - x[2], 2), round(x[1] + x[2], 2))
}
table_02_sketch = htmltools::withTags(
table(
class = "display",
thead(
tr(
th(rowspan = 3, "Climate variable"),
th(colspan = 4, "Before"),
th(colspan = 4, "During"),
th(colspan = 4, "After")
),
tr(
lapply(rep(c("Min", "Max"), 3), th, colspan = 2)
),
tr(
lapply(rep(c("Estimate", "95% CI"), 6), th)
)
)
)
)
df_tvcm_5$period <- apply(df_tvcm_5[, c("x")], 1, get_period)
df_tvcm_5$period <- factor(
x = df_tvcm_5$period,
levels = c("before", "during", "after")
)
df_tvcm_5 %>%
dplyr::group_by(term, period, specie) %>%
dplyr::slice_max(fit, n = 1) -> df_tvcm_5_max
df_tvcm_5 %>%
dplyr::group_by(term, period, specie) %>%
dplyr::slice_min(fit, n = 1) -> df_tvcm_5_min
df_tvcm_5_max$extrema <- rep("max", length(df_tvcm_5_max$fit))
df_tvcm_5_min$extrema <- rep("min", length(df_tvcm_5_min$fit))
df_tvcm_5_extrema <- dplyr::bind_rows(df_tvcm_5_max, df_tvcm_5_min)
df_tvcm_5_extrema$ci <- apply(df_tvcm_5_extrema[, c("fit", "se")], 1, get_ci)
df_tvcm_5_extrema %>%
dplyr::filter(specie == "P. vivax") %>%
dplyr::select(term, period, extrema, fit, ci) %>%
dplyr::mutate(fit = round(fit, 4)) %>%
tidyr::pivot_wider(
id_cols = term,
names_from = c(period, extrema),
values_from = c(fit, ci),
names_sep = "_"
) %>%
dplyr::select(
term,
fit_before_min,
ci_before_min,
fit_before_max,
ci_before_max,
fit_during_min,
ci_during_min,
fit_during_max,
ci_during_max,
fit_after_min,
ci_after_min,
fit_after_max,
ci_after_max
) -> df_tvcm_5_extrema_vivax
df_tvcm_5_extrema_vivax %>%
tibble::column_to_rownames(var = "term") %>%
DT::datatable(
container = table_02_sketch,
extensions = 'Buttons',
options = list(
dom = "Bt",
scrollX = TRUE,
columnDefs = list(list(className = 'dt-center', targets = 6)),
buttons = c('copy', 'csv')
),
caption = "P. vivax"
)
df_tvcm_5_extrema %>%
dplyr::filter(specie == "P. falciparum") %>%
dplyr::select(term, period, extrema, fit, ci) %>%
dplyr::mutate(fit = round(fit, 4)) %>%
tidyr::pivot_wider(
id_cols = term,
names_from = c(period, extrema),
values_from = c(fit, ci),
names_sep = "_"
) %>%
dplyr::select(
term,
fit_before_min,
ci_before_min,
fit_before_max,
ci_before_max,
fit_during_min,
ci_during_min,
fit_during_max,
ci_during_max,
fit_after_min,
ci_after_min,
fit_after_max,
ci_after_max
) -> df_tvcm_5_extrema_falciparum
df_tvcm_5_extrema_falciparum %>%
tibble::column_to_rownames(var = "term") %>%
DT::datatable(
container = table_02_sketch,
extensions = 'Buttons',
options = list(
dom = "Bt",
scrollX = TRUE,
columnDefs = list(list(className = 'dt-center', targets = 6)),
buttons = c('copy', 'csv')
),
caption = "P. falciparum"
)
df_tvcm_5 %>%
dplyr::group_by(period, specie, term) %>%
#dplyr::arrange(x) %>%
dplyr::filter(
fit == min(fit) | fit == max(fit)
)
## # A tibble: 36 x 6
## # Groups: period, specie, term [18]
## x se fit[,1] term specie period
## <dttm> <dbl> <dbl> <fct> <fct> <fct>
## 1 2001-01-01 00:00:00 0.939 -0.883 Actual Evapotranspiration P. vivax before
## 2 2005-09-30 07:21:31 0.445 0.310 Actual Evapotranspiration P. vivax before
## 3 2005-12-25 23:19:35 0.446 0.314 Actual Evapotranspiration P. vivax during
## 4 2010-08-30 12:24:31 0.482 -0.623 Actual Evapotranspiration P. vivax during
## 5 2012-11-24 15:32:06 0.492 -0.936 Actual Evapotranspiration P. vivax after
## 6 2017-12-01 00:00:00 0.787 0.657 Actual Evapotranspiration P. vivax after
## 7 2001-01-01 00:00:00 0.979 -1.90 Precipitation P. vivax before
## 8 2005-09-30 07:21:31 0.356 -0.0146 Precipitation P. vivax before
## 9 2006-06-17 07:15:45 0.350 0.0484 Precipitation P. vivax during
## 10 2010-08-30 12:24:31 0.635 -0.867 Precipitation P. vivax during
## # ... with 26 more rows
The estimated time-varying coefficients of the climate variables for both models, as well as their approximated 95% confidence interval in each time point on the graph, are shown in Figure 2. As mentioned before, this time-varying coefficients are estimated as smooth functions over time. Although the confidence intervals include the zero most of the time in each case, the global contribution of the estimated smooth functions in the models is significant according to the approximated p-values in the summary table of the fitted model (APPENDIX). Something to highlight is the apparent concurrent drop to a more negative linear contribution of the climate variables at the end of the PAMAFRO intervention. The estimated confidence intervals for the coefficients at the end and some time after the PAMAFRO period are also farther away from zero than anytime in the study. This is more evident in the case of minimum temperature, where a significant drop is visible, for both P. vivax and P. falciparum. In the case of precipitation, this pattern is less evident for P.vivax, and for P. falciparum there seems to be a periodicity in this drop down which casually matches with the end of PAMAFRO intervention.
Maximum and minimum values of the estimated time-varying coefficients for each model before, during, and after the PAMAFRO intervention, including their 95% confidence intervals, are presented in Table 2. Before the intervention
P. vivax
Use max and min
Before PAMAFRO, the actual evapotranspiration effect’s had a change of direction from being negative to being positive, going from -0.93 (95% CI: []) at the begining of this period to 0.24 at the end, which means a 74% decrease in the effect’s magnitude. During PAMAFRO, an opposite change of direction occurred, going from 0.24 to -0.67, which means a 177% increase in the effect’s magnitude.
After PAMAFRO, a change in direction in the effect similar to that of the first period happened, going from -0.68 to 0.63, which means a 7% decrease in the effect’s magnitude.
Before PAMAFRO, the precipitation had a negative effect throughout this period, but its magnitude decreased by 98%, going from -1.81 (CI) to -0.04 (CI).
During PAMAFRO, the precipitation continued to have a negative effect on the incidence, but it increased its magnitude 17 times, going from -0.03 (CI) to -0.53 (CI).
After PAMAFRO, a change in direction in the effect similar to that of the first period happened, going from -0.68 to 0.63, which means a 7% decrease in the effect’s magnitude.
tictoc::tic("GAM model fitting")
vivax_tvc_dlm_5 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = aet_lag1, bs = "tp", k = 50) +
s(time, by = aet_lag3, bs = "tp", k = 50) +
s(time, by = aet_lag6, bs = "tp", k = 50) +
s(time, by = aet_lag12, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = prcp_lag1, bs = "tp", k = 30) +
s(time, by = prcp_lag3, bs = "tp", k = 30) +
s(time, by = prcp_lag6, bs = "tp", k = 30) +
s(time, by = prcp_lag12, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100) +
s(time, by = tmin_lag1, bs = "tp", k = 100) +
s(time, by = tmin_lag3, bs = "tp", k = 100) +
s(time, by = tmin_lag6, bs = "tp", k = 100) +
s(time, by = tmin_lag12, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 18.39 sec elapsed
cat("\nConverged?", vivax_tvc_dlm_5$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvc_dlm_5)
##
## Family: Negative Binomial(0.87)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet, bs = "tp", k = 50) + s(time, by = aet_lag1, bs = "tp",
## k = 50) + s(time, by = aet_lag3, bs = "tp", k = 50) + s(time,
## by = aet_lag6, bs = "tp", k = 50) + s(time, by = aet_lag12,
## bs = "tp", k = 50) + s(time, by = prcp, bs = "tp", k = 30) +
## s(time, by = prcp_lag1, bs = "tp", k = 30) + s(time, by = prcp_lag3,
## bs = "tp", k = 30) + s(time, by = prcp_lag6, bs = "tp", k = 30) +
## s(time, by = prcp_lag12, bs = "tp", k = 30) + s(time, by = tmin,
## bs = "tp", k = 100) + s(time, by = tmin_lag1, bs = "tp",
## k = 100) + s(time, by = tmin_lag3, bs = "tp", k = 100) +
## s(time, by = tmin_lag6, bs = "tp", k = 100) + s(time, by = tmin_lag12,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.0038 0.4249 -16.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.832 48.000 292.630 < 2e-16 ***
## s(time):aet 2.904 3.317 0.329 0.74052
## s(time):aet_lag1 3.765 4.363 2.410 0.03826 *
## s(time):aet_lag3 8.352 9.868 3.980 2.02e-05 ***
## s(time):aet_lag6 14.249 16.961 4.343 < 2e-16 ***
## s(time):aet_lag12 3.397 3.904 3.431 0.00808 **
## s(time):prcp 6.067 7.276 2.956 0.00448 **
## s(time):prcp_lag1 5.134 6.138 5.504 9.58e-06 ***
## s(time):prcp_lag3 3.553 4.158 2.253 0.06032 .
## s(time):prcp_lag6 2.217 2.387 2.284 0.06635 .
## s(time):prcp_lag12 2.000 2.000 3.452 0.03172 *
## s(time):tmin 2.000 2.001 2.107 0.12162
## s(time):tmin_lag1 7.180 8.562 5.319 < 2e-16 ***
## s(time):tmin_lag3 2.000 2.000 4.501 0.01112 *
## s(time):tmin_lag6 49.890 61.147 3.579 < 2e-16 ***
## s(time):tmin_lag12 4.317 5.071 2.763 0.01687 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.492 Deviance explained = 68.7%
## fREML = 14848 Scale est. = 1 n = 9996
tictoc::tic("GAM model fitting")
falciparum_tvc_dlm_5 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet, bs = "tp", k = 50) +
s(time, by = aet_lag1, bs = "tp", k = 50) +
s(time, by = aet_lag3, bs = "tp", k = 50) +
s(time, by = aet_lag6, bs = "tp", k = 50) +
s(time, by = aet_lag12, bs = "tp", k = 50) +
s(time, by = prcp, bs = "tp", k = 30) +
s(time, by = prcp_lag1, bs = "tp", k = 30) +
s(time, by = prcp_lag3, bs = "tp", k = 30) +
s(time, by = prcp_lag6, bs = "tp", k = 30) +
s(time, by = prcp_lag12, bs = "tp", k = 30) +
s(time, by = tmin, bs = "tp", k = 100) +
s(time, by = tmin_lag1, bs = "tp", k = 100) +
s(time, by = tmin_lag3, bs = "tp", k = 100) +
s(time, by = tmin_lag6, bs = "tp", k = 100) +
s(time, by = tmin_lag12, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 17.29 sec elapsed
cat("\nConverged?", falciparum_tvc_dlm_5$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvc_dlm_5)
##
## Family: Negative Binomial(0.525)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet, bs = "tp", k = 50) + s(time, by = aet_lag1, bs = "tp",
## k = 50) + s(time, by = aet_lag3, bs = "tp", k = 50) + s(time,
## by = aet_lag6, bs = "tp", k = 50) + s(time, by = aet_lag12,
## bs = "tp", k = 50) + s(time, by = prcp, bs = "tp", k = 30) +
## s(time, by = prcp_lag1, bs = "tp", k = 30) + s(time, by = prcp_lag3,
## bs = "tp", k = 30) + s(time, by = prcp_lag6, bs = "tp", k = 30) +
## s(time, by = prcp_lag12, bs = "tp", k = 30) + s(time, by = tmin,
## bs = "tp", k = 100) + s(time, by = tmin_lag1, bs = "tp",
## k = 100) + s(time, by = tmin_lag3, bs = "tp", k = 100) +
## s(time, by = tmin_lag6, bs = "tp", k = 100) + s(time, by = tmin_lag12,
## bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.774 0.529 -14.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.568 48.000 186.801 < 2e-16 ***
## s(time):aet 5.578 6.630 1.864 0.076865 .
## s(time):aet_lag1 10.996 13.021 2.874 0.000367 ***
## s(time):aet_lag3 3.130 3.632 25.079 < 2e-16 ***
## s(time):aet_lag6 9.631 11.417 8.143 < 2e-16 ***
## s(time):aet_lag12 2.000 2.000 17.044 < 2e-16 ***
## s(time):prcp 3.095 3.606 3.463 0.007761 **
## s(time):prcp_lag1 4.278 5.094 5.974 1.45e-05 ***
## s(time):prcp_lag3 2.580 2.936 2.592 0.066192 .
## s(time):prcp_lag6 2.000 2.000 3.654 0.025928 *
## s(time):prcp_lag12 7.478 8.984 2.352 0.012928 *
## s(time):tmin 2.001 2.001 0.274 0.760360
## s(time):tmin_lag1 7.726 9.224 5.876 < 2e-16 ***
## s(time):tmin_lag3 2.000 2.000 5.857 0.002870 **
## s(time):tmin_lag6 15.847 18.856 4.286 < 2e-16 ***
## s(time):tmin_lag12 10.520 12.566 2.848 0.000524 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.382 Deviance explained = 66.1%
## fREML = 13735 Scale est. = 1 n = 9996
tvc_dlm_5_terms <- c(
"Actual Evapotranspiration (lag = 0)",
"Actual Evapotranspiration (lag = 1)",
"Actual Evapotranspiration (lag = 3)",
"Actual Evapotranspiration (lag = 6)",
"Actual Evapotranspiration (lag = 12)",
"Precipitation (lag = 0)",
"Precipitation (lag = 1)",
"Precipitation (lag = 3)",
"Precipitation (lag = 6)",
"Precipitation (lag = 12)",
"Min Temperature (lag = 0)",
"Min Temperature (lag = 1)",
"Min Temperature (lag = 3)",
"Min Temperature (lag = 6)",
"Min Temperature (lag = 12)"
)
df_tvc_dlm_5 <- get_tvcoef(
plot_vivax_tvc_dlm_5,
plot_falciparum_tvc_dlm_5,
tvc_dlm_5_terms
)
df_tvc_dlm_5
## # A tibble: 15,000 x 5
## x se fit[,1] term specie
## <dttm> <dbl> <dbl> <fct> <fct>
## 1 2001-01-01 00:00:00 0.859 0.0203 Actual Evapotranspiration (lag = 0) P. viv~
## 2 2001-01-13 09:08:17 0.853 0.0203 Actual Evapotranspiration (lag = 0) P. viv~
## 3 2001-01-25 18:16:35 0.847 0.0204 Actual Evapotranspiration (lag = 0) P. viv~
## 4 2001-02-07 03:24:53 0.841 0.0204 Actual Evapotranspiration (lag = 0) P. viv~
## 5 2001-02-19 12:33:11 0.835 0.0205 Actual Evapotranspiration (lag = 0) P. viv~
## 6 2001-03-03 21:41:28 0.829 0.0205 Actual Evapotranspiration (lag = 0) P. viv~
## 7 2001-03-16 06:49:46 0.823 0.0205 Actual Evapotranspiration (lag = 0) P. viv~
## 8 2001-03-28 15:58:04 0.817 0.0206 Actual Evapotranspiration (lag = 0) P. viv~
## 9 2001-04-10 01:06:22 0.811 0.0206 Actual Evapotranspiration (lag = 0) P. viv~
## 10 2001-04-22 10:14:40 0.805 0.0207 Actual Evapotranspiration (lag = 0) P. viv~
## # ... with 14,990 more rows
df_tvc_dlm_5$variable <- factor(
stringr::str_extract(df_tvc_dlm_5$term, ".*(?=\\s\\()"),
levels = c("Actual Evapotranspiration", "Precipitation", "Min Temperature")
)
df_tvc_dlm_5$lag <- factor(
stringr::str_extract(df_tvc_dlm_5$term, "(?<=\\().*(?=\\))"),
levels = c("lag = 0", "lag = 1", "lag = 3", "lag = 6", "lag = 12")
)
df_tvc_dlm_5 %>%
ggplot2::ggplot() +
facet_grid(cols = vars(specie), rows = vars(variable), scales = "free") +
geom_line(mapping = aes(x = x, y = fit, color = lag), size = 1) +
geom_ribbon(
mapping = aes(
x = x,
y = fit,
ymin = fit + se,
ymax = fit - se,
fill = lag
),
alpha = 0.1
) +
geom_rect(
data = df_pamafro_period,
mapping = aes(xmin = start, xmax = end),
ymin = -60,
ymax = 60,
alpha = 0.2
) +
geom_hline(mapping = aes(yintercept = 0), alpha = 0.6) +
geom_vline(
xintercept = c(df_pamafro_period$start, df_pamafro_period$end),
linetype = 'dashed',
alpha = 0.8
) +
scale_x_datetime(date_labels = "%Y", date_breaks = "2 year") +
ggsci::scale_color_npg() +
ggsci::scale_fill_npg() +
ylab("b(t)") +
xlab(NULL) +
theme(legend.position = "top", legend.title = element_blank())
tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag1 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag1, bs = "tp", k = 50) +
s(time, by = prcp_lag1, bs = "tp", k = 30) +
s(time, by = tmin_lag1, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.06 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag1$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag1)
##
## Family: Negative Binomial(0.843)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag1, bs = "tp", k = 50) + s(time, by = prcp_lag1,
## bs = "tp", k = 30) + s(time, by = tmin_lag1, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.0157 0.3356 -20.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.842 48.000 307.923 < 2e-16 ***
## s(time):aet_lag1 6.578 7.827 6.211 < 2e-16 ***
## s(time):prcp_lag1 2.001 2.002 11.054 1.6e-05 ***
## s(time):tmin_lag1 67.927 80.705 8.719 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.492 Deviance explained = 67.9%
## fREML = 14795 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag1, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 3.310003e-09 2.864826e-08 -2.549034e-05 3.080321e-08
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 2.351599e+01 -5.844994e-03 -2.001069e-06 -1.864916e-01
## [2,] -5.844994e-03 1.244763e+00 3.129586e-06 -2.405453e-01
## [3,] -2.001069e-06 3.129586e-06 2.523820e-05 3.656213e-05
## [4,] -1.864916e-01 -2.405453e-01 3.656213e-05 1.947748e+01
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.84 NA NA
## s(time):aet_lag1 50.00 6.58 0.89 0.92
## s(time):prcp_lag1 30.00 2.00 0.89 0.92
## s(time):tmin_lag1 100.00 67.93 0.89 0.92
mgcv::plot.gam(
x = vivax_tvcm_5_lag1,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_5_lag1)
## para s(district) s(time):aet_lag1 s(time):prcp_lag1 s(time):tmin_lag1
## worst 1 1.00000000 0.9810728 0.9745491 0.9911320
## observed 1 0.13494760 0.9586554 0.8836913 0.9349924
## estimate 1 0.04423898 0.9619496 0.8996287 0.9685064
mgcv::concurvity(vivax_tvcm_5_lag1, full = FALSE)$estimate
## para s(district) s(time):aet_lag1 s(time):prcp_lag1
## para 1.0000000 0.02040816 0.3300338 0.2947308
## s(district) 1.0000000 1.00000000 0.3311771 0.3075222
## s(time):aet_lag1 0.9659814 0.01989962 1.0000000 0.8075944
## s(time):prcp_lag1 0.8066758 0.01736603 0.7405954 1.0000000
## s(time):tmin_lag1 0.9689610 0.02028509 0.9511616 0.8776304
## s(time):tmin_lag1
## para 0.3318437
## s(district) 0.3377886
## s(time):aet_lag1 0.9379776
## s(time):prcp_lag1 0.8498165
## s(time):tmin_lag1 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag1 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag1, bs = "tp", k = 50) +
s(time, by = prcp_lag1, bs = "tp", k = 30) +
s(time, by = tmin_lag1, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.25 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag1$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag1)
##
## Family: Negative Binomial(0.505)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag1, bs = "tp", k = 50) + s(time, by = prcp_lag1,
## bs = "tp", k = 30) + s(time, by = tmin_lag1, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.3965 0.4008 -20.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.58 49.000 184.966 <2e-16 ***
## s(time):aet_lag1 6.50 7.681 11.307 <2e-16 ***
## s(time):prcp_lag1 8.31 10.032 6.709 <2e-16 ***
## s(time):tmin_lag1 57.11 69.304 5.287 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.353 Deviance explained = 65%
## fREML = 13756 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag1, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -1.193661e-06 -6.655265e-08 -8.424542e-08 -6.779682e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 22.881522614 -0.009027759 0.00271401 -0.1606189
## [2,] -0.009027759 0.966148434 0.07169012 0.1547939
## [3,] 0.002714010 0.071690120 1.45472250 -0.3079749
## [4,] -0.160618933 0.154793945 -0.30797488 11.4015357
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.58 NA NA
## s(time):aet_lag1 50.00 6.50 0.84 0.59
## s(time):prcp_lag1 30.00 8.31 0.84 0.61
## s(time):tmin_lag1 100.00 57.11 0.84 0.64
mgcv::plot.gam(
x = falciparum_tvcm_5_lag1,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_5_lag1)
## para s(district) s(time):aet_lag1 s(time):prcp_lag1 s(time):tmin_lag1
## worst 1 1.00000000 0.9810728 0.9745491 0.9911320
## observed 1 0.09841707 0.9590690 0.9045041 0.9505211
## estimate 1 0.04423898 0.9619717 0.8995819 0.9685086
mgcv::concurvity(falciparum_tvcm_5_lag1, full = FALSE)$estimate
## para s(district) s(time):aet_lag1 s(time):prcp_lag1
## para 1.0000000 0.02040816 0.3310103 0.2961423
## s(district) 1.0000000 1.00000000 0.3321567 0.3090055
## s(time):aet_lag1 0.9659814 0.01989962 1.0000000 0.8073822
## s(time):prcp_lag1 0.8066758 0.01736603 0.7404622 1.0000000
## s(time):tmin_lag1 0.9689610 0.02028509 0.9511435 0.8774920
## s(time):tmin_lag1
## para 0.3321669
## s(district) 0.3381185
## s(time):aet_lag1 0.9379661
## s(time):prcp_lag1 0.8497555
## s(time):tmin_lag1 1.0000000
Plots
tvcm_5_lag1_terms <- c(
"Actual Evapotranspiration (Lag = 1)",
"Precipitation (Lag = 1)",
"Min Temperature (Lag = 1)"
)
df_tvcm_5_lag1 <- get_tvcoef(
plot_vivax_tvcm_5_lag1,
plot_falciparum_tvcm_5_lag1,
tvcm_5_lag1_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag1)
tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag3 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag3, bs = "tp", k = 50) +
s(time, by = prcp_lag3, bs = "tp", k = 30) +
s(time, by = tmin_lag3, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.17 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag3$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag3)
##
## Family: Negative Binomial(0.844)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag3, bs = "tp", k = 50) + s(time, by = prcp_lag3,
## bs = "tp", k = 30) + s(time, by = tmin_lag3, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.2658 0.3336 -21.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.840 48.000 306.518 < 2e-16 ***
## s(time):aet_lag3 11.350 13.611 5.150 < 2e-16 ***
## s(time):prcp_lag3 3.076 3.569 4.222 0.00533 **
## s(time):tmin_lag3 68.537 81.279 7.143 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.489 Deviance explained = 67.9%
## fREML = 14813 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag3, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -1.865702e-08 -3.338955e-07 -8.264585e-09 -9.960136e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 23.511996247 -0.013328885 -0.005484303 -0.2294394
## [2,] -0.013328885 1.793623635 -0.009030326 -0.2093404
## [3,] -0.005484303 -0.009030326 0.085106505 0.2407554
## [4,] -0.229439444 -0.209340353 0.240755368 15.4258222
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.84 NA NA
## s(time):aet_lag3 50.00 11.35 0.86 0.40
## s(time):prcp_lag3 30.00 3.08 0.86 0.35
## s(time):tmin_lag3 100.00 68.54 0.86 0.37
mgcv::plot.gam(
x = vivax_tvcm_5_lag3,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_5_lag3)
## para s(district) s(time):aet_lag3 s(time):prcp_lag3 s(time):tmin_lag3
## worst 1 1.00000000 0.9810063 0.9410134 0.9909274
## observed 1 0.13019926 0.9524503 0.8867507 0.9365577
## estimate 1 0.04424801 0.9613851 0.8968900 0.9676146
mgcv::concurvity(vivax_tvcm_5_lag3, full = FALSE)$estimate
## para s(district) s(time):aet_lag3 s(time):prcp_lag3
## para 1.0000000 0.02040816 0.3288539 0.2950539
## s(district) 1.0000000 1.00000000 0.3300087 0.3073334
## s(time):aet_lag3 0.9663080 0.01990851 1.0000000 0.8053587
## s(time):prcp_lag3 0.8038787 0.01732113 0.7184329 1.0000000
## s(time):tmin_lag3 0.9688780 0.02028480 0.9495266 0.8744118
## s(time):tmin_lag3
## para 0.3324783
## s(district) 0.3385703
## s(time):aet_lag3 0.9382147
## s(time):prcp_lag3 0.8375486
## s(time):tmin_lag3 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag3 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag3, bs = "tp", k = 50) +
s(time, by = prcp_lag3, bs = "tp", k = 30) +
s(time, by = tmin_lag3, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.12 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag3$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag3)
##
## Family: Negative Binomial(0.502)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag3, bs = "tp", k = 50) + s(time, by = prcp_lag3,
## bs = "tp", k = 30) + s(time, by = tmin_lag3, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.6774 0.3963 -21.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.572 48.000 186.424 < 2e-16 ***
## s(time):aet_lag3 7.063 8.364 12.027 < 2e-16 ***
## s(time):prcp_lag3 7.705 9.264 3.219 0.000561 ***
## s(time):tmin_lag3 52.876 64.661 4.865 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.374 Deviance explained = 64.8%
## fREML = 13746 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag3, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 2.149497e-08 1.389648e-07 1.036213e-07 8.450160e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 22.8817615420 0.0007673148 -0.002532685 -0.21035931
## [2,] 0.0007673148 0.2331692112 0.060018827 0.03025332
## [3,] -0.0025326852 0.0600188275 0.638429428 -0.01355408
## [4,] -0.2103593129 0.0302533204 -0.013554075 6.75182477
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.57 NA NA
## s(time):aet_lag3 50.00 7.06 0.82 0.46
## s(time):prcp_lag3 30.00 7.70 0.82 0.43
## s(time):tmin_lag3 100.00 52.88 0.82 0.38
mgcv::plot.gam(
x = falciparum_tvcm_5_lag3,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_5_lag3)
## para s(district) s(time):aet_lag3 s(time):prcp_lag3 s(time):tmin_lag3
## worst 1 1.00000000 0.9810063 0.9410134 0.9909274
## observed 1 0.09225860 0.9583952 0.8787201 0.9647161
## estimate 1 0.04424801 0.9614316 0.8968682 0.9676456
mgcv::concurvity(falciparum_tvcm_5_lag3, full = FALSE)$estimate
## para s(district) s(time):aet_lag3 s(time):prcp_lag3
## para 1.0000000 0.02040816 0.3308493 0.2975150
## s(district) 1.0000000 1.00000000 0.3320115 0.3099293
## s(time):aet_lag3 0.9663080 0.01990851 1.0000000 0.8050513
## s(time):prcp_lag3 0.8038787 0.01732113 0.7183098 1.0000000
## s(time):tmin_lag3 0.9688780 0.02028480 0.9494990 0.8742286
## s(time):tmin_lag3
## para 0.3338450
## s(district) 0.3399601
## s(time):aet_lag3 0.9381868
## s(time):prcp_lag3 0.8374833
## s(time):tmin_lag3 1.0000000
Plots
tvcm_5_lag3_terms <- c(
"Actual Evapotranspiration (Lag = 3)",
"Precipitation (Lag = 3)",
"Min Temperature (Lag = 3)"
)
df_tvcm_5_lag3 <- get_tvcoef(
plot_vivax_tvcm_5_lag3,
plot_falciparum_tvcm_5_lag3,
tvcm_5_lag3_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag3)
tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag6 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag6, bs = "tp", k = 50) +
s(time, by = prcp_lag6, bs = "tp", k = 30) +
s(time, by = tmin_lag6, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.06 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag6$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag6)
##
## Family: Negative Binomial(0.851)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag6, bs = "tp", k = 50) + s(time, by = prcp_lag6,
## bs = "tp", k = 30) + s(time, by = tmin_lag6, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1630 0.3346 -21.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.84 49.00 302.781 < 2e-16 ***
## s(time):aet_lag6 14.79 17.67 7.402 < 2e-16 ***
## s(time):prcp_lag6 2.00 2.00 5.264 0.00519 **
## s(time):tmin_lag6 64.98 77.74 8.105 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.507 Deviance explained = 68.2%
## fREML = 14810 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag6, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] -5.321594e-07 3.465836e-07 -1.043507e-04 4.475891e-06
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 2.351450e+01 -3.703506e-02 5.273644e-07 -2.871711e-02
## [2,] -3.703506e-02 2.875807e+00 -1.588277e-07 4.874460e-01
## [3,] 5.273644e-07 -1.588277e-07 1.043293e-04 -4.184334e-06
## [4,] -2.871711e-02 4.874460e-01 -4.184334e-06 2.361061e+01
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.0 47.8 NA NA
## s(time):aet_lag6 50.0 14.8 0.87 0.46
## s(time):prcp_lag6 30.0 2.0 0.87 0.54
## s(time):tmin_lag6 100.0 65.0 0.87 0.42
mgcv::plot.gam(
x = vivax_tvcm_5_lag6,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_5_lag6)
## para s(district) s(time):aet_lag6 s(time):prcp_lag6 s(time):tmin_lag6
## worst 1 1.00000000 0.9814022 0.9612554 0.9909056
## observed 1 0.13611775 0.9497681 0.8720685 0.9500013
## estimate 1 0.04413469 0.9623054 0.8983731 0.9677595
mgcv::concurvity(vivax_tvcm_5_lag6, full = FALSE)$estimate
## para s(district) s(time):aet_lag6 s(time):prcp_lag6
## para 1.0000000 0.02040816 0.3282686 0.2932652
## s(district) 1.0000000 1.00000000 0.3294415 0.3049702
## s(time):aet_lag6 0.9663219 0.01990864 1.0000000 0.8077621
## s(time):prcp_lag6 0.8050018 0.01732512 0.7312992 1.0000000
## s(time):tmin_lag6 0.9689491 0.02028989 0.9507838 0.8760512
## s(time):tmin_lag6
## para 0.3316525
## s(district) 0.3377419
## s(time):aet_lag6 0.9400951
## s(time):prcp_lag6 0.8355413
## s(time):tmin_lag6 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag6 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag6, bs = "tp", k = 50) +
s(time, by = prcp_lag6, bs = "tp", k = 30) +
s(time, by = tmin_lag6, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.16 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag6$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag6)
##
## Family: Negative Binomial(0.51)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag6, bs = "tp", k = 50) + s(time, by = prcp_lag6,
## bs = "tp", k = 30) + s(time, by = tmin_lag6, bs = "tp", k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.7259 0.4014 -21.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.58 48.00 190.268 < 2e-16 ***
## s(time):aet_lag6 11.22 13.41 11.087 < 2e-16 ***
## s(time):prcp_lag6 2.00 2.00 10.164 3.94e-05 ***
## s(time):tmin_lag6 62.16 74.84 5.632 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.368 Deviance explained = 65.2%
## fREML = 13772 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag6, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 1.306886e-07 -1.588761e-06 -7.249868e-05 -4.170213e-06
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 2.287493e+01 -7.612699e-03 -1.388942e-07 -1.208812e-01
## [2,] -7.612699e-03 1.100414e+00 1.395135e-06 -1.687119e-01
## [3,] -1.388942e-07 1.395135e-06 7.248513e-05 3.602796e-06
## [4,] -1.208812e-01 -1.687119e-01 3.602796e-06 1.871457e+01
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.0 47.6 NA NA
## s(time):aet_lag6 50.0 11.2 0.82 0.56
## s(time):prcp_lag6 30.0 2.0 0.82 0.49
## s(time):tmin_lag6 100.0 62.2 0.82 0.48
mgcv::plot.gam(
x = falciparum_tvcm_5_lag6,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_5_lag6)
## para s(district) s(time):aet_lag6 s(time):prcp_lag6 s(time):tmin_lag6
## worst 1 1.00000000 0.9814022 0.9612554 0.9909056
## observed 1 0.09867811 0.9504558 0.8783491 0.9574366
## estimate 1 0.04413469 0.9623489 0.8983495 0.9677900
mgcv::concurvity(falciparum_tvcm_5_lag6, full = FALSE)$estimate
## para s(district) s(time):aet_lag6 s(time):prcp_lag6
## para 1.0000000 0.02040816 0.3302841 0.2957132
## s(district) 1.0000000 1.00000000 0.3314647 0.3075541
## s(time):aet_lag6 0.9663219 0.01990864 1.0000000 0.8074452
## s(time):prcp_lag6 0.8050018 0.01732512 0.7311948 1.0000000
## s(time):tmin_lag6 0.9689491 0.02028989 0.9507513 0.8758679
## s(time):tmin_lag6
## para 0.3330126
## s(district) 0.3391256
## s(time):aet_lag6 0.9400653
## s(time):prcp_lag6 0.8354854
## s(time):tmin_lag6 1.0000000
Plots
tvcm_5_lag6_terms <- c(
"Actual Evapotranspiration (Lag = 6)",
"Precipitation (Lag = 6)",
"Min Temperature (Lag = 6)"
)
df_tvcm_5_lag6 <- get_tvcoef(
plot_vivax_tvcm_5_lag6,
plot_falciparum_tvcm_5_lag6,
tvcm_5_lag6_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag6)
tictoc::tic("GAM model fitting")
vivax_tvcm_5_lag12 <- mgcv::bam(
formula =
vivax ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag12, bs = "tp", k = 50) +
s(time, by = prcp_lag12, bs = "tp", k = 30) +
s(time, by = tmin_lag12, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 1.99 sec elapsed
cat("\nConverged?", vivax_tvcm_5_lag12$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(vivax_tvcm_5_lag12)
##
## Family: Negative Binomial(0.837)
## Link function: log
##
## Formula:
## vivax ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag12, bs = "tp", k = 50) + s(time, by = prcp_lag12,
## bs = "tp", k = 30) + s(time, by = tmin_lag12, bs = "tp",
## k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.6713 0.3347 -19.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.843 49.000 298.096 < 2e-16 ***
## s(time):aet_lag12 4.549 5.329 3.133 0.006480 **
## s(time):prcp_lag12 4.390 5.208 4.993 0.000128 ***
## s(time):tmin_lag12 67.194 79.923 9.681 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.495 Deviance explained = 67.7%
## fREML = 14792 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(vivax_tvcm_5_lag12, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 4.187093e-09 3.229654e-08 2.757584e-08 2.976799e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 23.5132411919 0.0003037737 -0.004400675 -0.06419612
## [2,] 0.0003037737 0.5908829528 0.029270943 -0.09181087
## [3,] -0.0044006747 0.0292709425 0.639478867 -0.23036588
## [4,] -0.0641961208 -0.0918108674 -0.230365875 18.74634005
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.84 NA NA
## s(time):aet_lag12 50.00 4.55 0.85 0.065 .
## s(time):prcp_lag12 30.00 4.39 0.85 0.095 .
## s(time):tmin_lag12 100.00 67.19 0.85 0.095 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mgcv::plot.gam(
x = vivax_tvcm_5_lag12,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(vivax_tvcm_5_lag12)
## para s(district) s(time):aet_lag12 s(time):prcp_lag12
## worst 1 1.00000000 0.9812527 0.9509801
## observed 1 0.14363243 0.9687927 0.9029249
## estimate 1 0.04433167 0.9631260 0.9004555
## s(time):tmin_lag12
## worst 0.9908588
## observed 0.9372290
## estimate 0.9673412
mgcv::concurvity(vivax_tvcm_5_lag12, full = FALSE)$estimate
## para s(district) s(time):aet_lag12 s(time):prcp_lag12
## para 1.0000000 0.02040816 0.3223619 0.2949887
## s(district) 1.0000000 1.00000000 0.3235868 0.3075754
## s(time):aet_lag12 0.9661201 0.01991534 1.0000000 0.8024519
## s(time):prcp_lag12 0.8032460 0.01731527 0.7229075 1.0000000
## s(time):tmin_lag12 0.9690735 0.02028380 0.9520233 0.8768135
## s(time):tmin_lag12
## para 0.3285054
## s(district) 0.3343234
## s(time):aet_lag12 0.9396888
## s(time):prcp_lag12 0.8398721
## s(time):tmin_lag12 1.0000000
tictoc::tic("GAM model fitting")
falciparum_tvcm_5_lag12 <- mgcv::bam(
formula =
falciparum ~
offset(ln_pop2015) +
s(district, bs = "re") +
s(time, by = aet_lag12, bs = "tp", k = 50) +
s(time, by = prcp_lag12, bs = "tp", k = 30) +
s(time, by = tmin_lag12, bs = "tp", k = 100),
family = nb(),
data = dat,
method = "fREML",
discrete = TRUE
)
tictoc::toc()
## GAM model fitting: 2.29 sec elapsed
cat("\nConverged?", falciparum_tvcm_5_lag12$converged, "\n")
##
## Converged? TRUE
mgcv::summary.gam(falciparum_tvcm_5_lag12)
##
## Family: Negative Binomial(0.5)
## Link function: log
##
## Formula:
## falciparum ~ offset(ln_pop2015) + s(district, bs = "re") + s(time,
## by = aet_lag12, bs = "tp", k = 50) + s(time, by = prcp_lag12,
## bs = "tp", k = 30) + s(time, by = tmin_lag12, bs = "tp",
## k = 100)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.8236 0.3972 -19.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(district) 47.579 48.000 187.037 < 2e-16 ***
## s(time):aet_lag12 5.718 6.721 13.743 < 2e-16 ***
## s(time):prcp_lag12 7.341 8.829 4.212 3.06e-05 ***
## s(time):tmin_lag12 52.717 64.372 4.839 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.379 Deviance explained = 64.7%
## fREML = 13741 Scale est. = 1 n = 9996
par(mfrow = c(2, 2))
mgcv::gam.check(falciparum_tvcm_5_lag12, pch = 19, cex = 0.3)
##
## Method: fREML Optimizer: perf chol
## $grad
## [1] 6.931163e-09 2.483024e-08 2.627335e-08 1.651579e-07
##
## $hess
## [,1] [,2] [,3] [,4]
## [1,] 22.8592386407 -0.002396716 -0.0003409251 -0.06632696
## [2,] -0.0023967157 0.645925425 0.0063204595 -0.01323179
## [3,] -0.0003409251 0.006320460 1.0481055181 0.04622888
## [4,] -0.0663269569 -0.013231789 0.0462288764 10.17568908
##
## Model rank = 230 / 230
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(district) 49.00 47.58 NA NA
## s(time):aet_lag12 50.00 5.72 0.83 0.55
## s(time):prcp_lag12 30.00 7.34 0.83 0.57
## s(time):tmin_lag12 100.00 52.72 0.83 0.64
mgcv::plot.gam(
x = falciparum_tvcm_5_lag12,
n = 500,
seWithMean = TRUE,
scale = 0,
pages = 1
)
mgcv::concurvity(falciparum_tvcm_5_lag12)
## para s(district) s(time):aet_lag12 s(time):prcp_lag12
## worst 1 1.00000000 0.9812527 0.9509801
## observed 1 0.10756223 0.9657721 0.8938823
## estimate 1 0.04433167 0.9631651 0.9004515
## s(time):tmin_lag12
## worst 0.9908588
## observed 0.9478304
## estimate 0.9673760
mgcv::concurvity(falciparum_tvcm_5_lag12, full = FALSE)$estimate
## para s(district) s(time):aet_lag12 s(time):prcp_lag12
## para 1.0000000 0.02040816 0.3242936 0.2974126
## s(district) 1.0000000 1.00000000 0.3255266 0.3101337
## s(time):aet_lag12 0.9661201 0.01991534 1.0000000 0.8022129
## s(time):prcp_lag12 0.8032460 0.01731527 0.7228119 1.0000000
## s(time):tmin_lag12 0.9690735 0.02028380 0.9519893 0.8766634
## s(time):tmin_lag12
## para 0.3299275
## s(district) 0.3357694
## s(time):aet_lag12 0.9396622
## s(time):prcp_lag12 0.8398213
## s(time):tmin_lag12 1.0000000
Plots
tvcm_5_lag12_terms <- c(
"Actual Evapotranspiration (Lag = 12)",
"Precipitation (Lag = 12)",
"Min Temperature (Lag = 12)"
)
df_tvcm_5_lag12 <- get_tvcoef(
plot_vivax_tvcm_5_lag12,
plot_falciparum_tvcm_5_lag12,
tvcm_5_lag12_terms
)
plot_tvcm_by_terms(df_tvcm_5_lag12)
df_malaria_loreto %>%
group_by(district) %>%
summarise(
vivax_zeros = 100 * (sum(vivax < 1)/n()),
falciparum_zeros = 100 * (sum(falciparum < 1)/n())
) %>%
arrange(desc(vivax_zeros))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 49 x 3
## district vivax_zeros falciparum_zeros
## <fct> <dbl> <dbl>
## 1 PADRE MARQUEZ 97.7 99.1
## 2 INAHUAYA 96.8 99.5
## 3 PAMPA HERMOSA 94.0 99.5
## 4 VARGAS GUERRA 91.2 98.1
## 5 PUTUMAYO 83.3 84.7
## 6 CAPELO 75.5 97.7
## 7 PUINAHUA 72.7 94.9
## 8 SARAYACU 70.8 93.5
## 9 CONTAMANA 69.4 94.9
## 10 JENARO HERRERA 65.3 95.8
## # ... with 39 more rows
tvcm_lag0_terms <- c(
"Actual Evapotranspiration (lag = 0)",
"Precipitation (lag = 0)",
"Min Temperature (lag = 0)"
)
df_tvcm_lag0 <- get_tvcoef(
plot_vivax_tvcm_lag0,
plot_falciparum_tvcm_lag0,
tvcm_lag0_terms
)
tvcm_lag1_terms <- c(
"Actual Evapotranspiration (lag = 1)",
"Precipitation (lag = 1)",
"Min Temperature (lag = 1)"
)
df_tvcm_lag1 <- get_tvcoef(
plot_vivax_tvcm_lag1,
plot_falciparum_tvcm_lag1,
tvcm_lag1_terms
)
tvcm_lag3_terms <- c(
"Actual Evapotranspiration (lag = 3)",
"Precipitation (lag = 3)",
"Min Temperature (lag = 3)"
)
df_tvcm_lag3 <- get_tvcoef(
plot_vivax_tvcm_lag3,
plot_falciparum_tvcm_lag3,
tvcm_lag3_terms
)
tvcm_lag6_terms <- c(
"Actual Evapotranspiration (lag = 6)",
"Precipitation (lag = 6)",
"Min Temperature (lag = 6)"
)
df_tvcm_lag6 <- get_tvcoef(
plot_vivax_tvcm_lag6,
plot_falciparum_tvcm_lag6,
tvcm_lag6_terms
)
tvcm_lag12_terms <- c(
"Actual Evapotranspiration (lag = 12)",
"Precipitation (lag = 12)",
"Min Temperature (lag = 12)"
)
df_tvcm_lag12 <- get_tvcoef(
plot_vivax_tvcm_lag12,
plot_falciparum_tvcm_lag12,
tvcm_lag12_terms
)
df_tvcm_lagged <- rbind(
df_tvcm_lag0,
df_tvcm_lag1,
df_tvcm_lag3,
df_tvcm_lag6,
df_tvcm_lag12
)
df_tvcm_lagged
## # A tibble: 15,000 x 5
## x se fit[,1] term specie
## <dttm> <dbl> <dbl> <fct> <fct>
## 1 2001-01-01 00:00:00 0.939 -0.883 Actual Evapotranspiration (lag = 0) P. viv~
## 2 2001-01-13 09:08:17 0.924 -0.871 Actual Evapotranspiration (lag = 0) P. viv~
## 3 2001-01-25 18:16:35 0.909 -0.859 Actual Evapotranspiration (lag = 0) P. viv~
## 4 2001-02-07 03:24:53 0.895 -0.847 Actual Evapotranspiration (lag = 0) P. viv~
## 5 2001-02-19 12:33:11 0.881 -0.835 Actual Evapotranspiration (lag = 0) P. viv~
## 6 2001-03-03 21:41:28 0.867 -0.823 Actual Evapotranspiration (lag = 0) P. viv~
## 7 2001-03-16 06:49:46 0.853 -0.811 Actual Evapotranspiration (lag = 0) P. viv~
## 8 2001-03-28 15:58:04 0.840 -0.799 Actual Evapotranspiration (lag = 0) P. viv~
## 9 2001-04-10 01:06:22 0.827 -0.787 Actual Evapotranspiration (lag = 0) P. viv~
## 10 2001-04-22 10:14:40 0.814 -0.775 Actual Evapotranspiration (lag = 0) P. viv~
## # ... with 14,990 more rows
df_tvcm_lagged$variable <- factor(
stringr::str_extract(df_tvcm_lagged$term, ".*(?=\\s\\()"),
levels = c("Actual Evapotranspiration", "Precipitation", "Min Temperature")
)
df_tvcm_lagged$lag <- factor(
stringr::str_extract(df_tvcm_lagged$term, "(?<=\\().*(?=\\))"),
levels = c("lag = 0", "lag = 1", "lag = 3", "lag = 6", "lag = 12")
)
df_tvcm_lagged %>%
ggplot2::ggplot() +
facet_grid(cols = vars(specie), rows = vars(variable), scales = "free") +
geom_line(mapping = aes(x = x, y = fit, color = lag), size = 1) +
geom_ribbon(
mapping = aes(
x = x,
y = fit,
ymin = fit + se,
ymax = fit - se,
fill = lag
),
alpha = 0.1
) +
geom_rect(
data = df_pamafro_period,
mapping = aes(xmin = start, xmax = end),
ymin = -60,
ymax = 60,
alpha = 0.2
) +
geom_hline(mapping = aes(yintercept = 0), alpha = 0.6) +
geom_vline(
xintercept = c(df_pamafro_period$start, df_pamafro_period$end),
linetype = 'dashed',
alpha = 0.8
) +
scale_x_datetime(date_labels = "%Y", date_breaks = "2 year") +
ggsci::scale_color_npg() +
ggsci::scale_fill_npg() +
ylab("b(t)") +
xlab(NULL) +
theme(legend.position = "top", legend.title = element_blank())
AIC(
vivax_tvcm_5,
vivax_tvcm_5_lag1,
vivax_tvcm_5_lag3,
vivax_tvcm_5_lag6,
vivax_tvcm_5_lag12
) -> vivax_tvcm_5_aic
Vivax (AIC: 70783.24, DF: 130.3846) (AIC: 70689.94, DF: 133.4880)
vivax_tvcm_5_aic
## df AIC
## vivax_tvcm_5 130.3846 70783.24
## vivax_tvcm_5_lag1 128.3019 70755.05
## vivax_tvcm_5_lag3 137.4845 70763.09
## vivax_tvcm_5_lag6 133.4880 70689.94
## vivax_tvcm_5_lag12 128.8622 70807.74
readr::write_csv(vivax_tvcm_5_aic, "eval_vivax_lagged_tvcm.csv")
AIC(
falciparum_tvcm_5,
falciparum_tvcm_5_lag1,
falciparum_tvcm_5_lag3,
falciparum_tvcm_5_lag6,
falciparum_tvcm_5_lag12
) -> falciparum_tvcm_5_aic
Falciparum (AIC: 46213.92, DF: 122.0533) (AIC: 46137.46, DF: 127.7910)
falciparum_tvcm_5_aic
## df AIC
## falciparum_tvcm_5 122.0533 46213.92
## falciparum_tvcm_5_lag1 124.5639 46174.96
## falciparum_tvcm_5_lag3 123.0266 46216.87
## falciparum_tvcm_5_lag6 127.7910 46137.46
## falciparum_tvcm_5_lag12 118.5578 46221.94
readr::write_csv(falciparum_tvcm_5_aic, "eval_falciparum_lagged_tvcm.csv")